To run this code in my project using the renv environment, run the following lines of code
install.packages("renv") #install the package on the new computer (may not be necessary if renv bootstraps itself as expected)
renv::restore() #reinstall all the package versions in the renv lockfile
library("ViSEAGO")
##
## Warning: replacing previous import 'data.table::set' by 'dendextend::set' when
## loading 'ViSEAGO'
## Warning: replacing previous import 'dendextend::cutree' by 'stats::cutree' when
## loading 'ViSEAGO'
## Warning: replacing previous import 'AnnotationDbi::tail' by 'utils::tail' when
## loading 'ViSEAGO'
require("topGO")
## Loading required package: topGO
## Loading required package: BiocGenerics
## Loading required package: generics
##
## Attaching package: 'generics'
## The following objects are masked from 'package:base':
##
## as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
## setequal, union
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, saveRDS, table, tapply, unique,
## unsplit, which.max, which.min
## Loading required package: graph
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: SparseM
##
## groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built.
##
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
##
## members
require("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ ggplot2::annotate() masks ViSEAGO::annotate()
## ✖ stringr::boundary() masks graph::boundary()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::select() masks AnnotationDbi::select()
## ✖ dplyr::slice() masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#BiocManager::install("GO.db")
sessionInfo() #provides list of loaded packages and version of R.
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.0.1
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods
## [8] base
##
## other attached packages:
## [1] lubridate_1.9.4 forcats_1.0.1 stringr_1.6.0
## [4] dplyr_1.1.4 purrr_1.2.0 readr_2.1.5
## [7] tidyr_1.3.1 tibble_3.3.0 ggplot2_4.0.0
## [10] tidyverse_2.0.0 topGO_2.62.0 SparseM_1.84-2
## [13] GO.db_3.22.0 AnnotationDbi_1.72.0 IRanges_2.44.0
## [16] S4Vectors_0.48.0 Biobase_2.70.0 graph_1.88.0
## [19] BiocGenerics_0.56.0 generics_0.1.4 ViSEAGO_1.24.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.17.1 jsonlite_2.0.0
## [4] shape_1.4.6.1 magrittr_2.0.4 farver_2.1.2
## [7] rmarkdown_2.30 GlobalOptions_0.1.2 fs_1.6.6
## [10] vctrs_0.6.5 memoise_2.0.1 RCurl_1.98-1.17
## [13] webshot_0.5.5 htmltools_0.5.8.1 progress_1.2.3
## [16] dynamicTreeCut_1.63-1 curl_7.0.0 sass_0.4.10
## [19] bslib_0.9.0 htmlwidgets_1.6.4 plyr_1.8.9
## [22] httr2_1.2.1 plotly_4.11.0 cachem_1.1.0
## [25] igraph_2.2.1 lifecycle_1.0.4 iterators_1.0.14
## [28] pkgconfig_2.0.3 Matrix_1.7-4 R6_2.6.1
## [31] fastmap_1.2.0 clue_0.3-66 digest_0.6.37
## [34] colorspace_2.1-2 RSQLite_2.4.3 seriation_1.5.8
## [37] filelock_1.0.3 timechange_0.3.0 httr_1.4.7
## [40] compiler_4.5.2 bit64_4.6.0-1 withr_3.0.2
## [43] doParallel_1.0.17 S7_0.2.0 BiocParallel_1.44.0
## [46] viridis_0.6.5 DBI_1.2.3 UpSetR_1.4.0
## [49] heatmaply_1.6.0 dendextend_1.19.1 R.utils_2.13.0
## [52] biomaRt_2.66.0 rappdirs_0.3.3 rjson_0.2.23
## [55] tools_4.5.2 R.oo_1.27.1 glue_1.8.0
## [58] DiagrammeR_1.0.11 GOSemSim_2.36.0 grid_4.5.2
## [61] cluster_2.1.8.1 fgsea_1.36.0 gtable_0.3.6
## [64] tzdb_0.5.0 R.methodsS3_1.8.2 ca_0.71.1
## [67] data.table_1.17.8 hms_1.1.4 XVector_0.50.0
## [70] foreach_1.5.2 pillar_1.11.1 yulab.utils_0.2.1
## [73] circlize_0.4.16 BiocFileCache_3.0.0 lattice_0.22-7
## [76] renv_1.1.5 bit_4.6.0 tidyselect_1.2.1
## [79] registry_0.5-1 ComplexHeatmap_2.26.0 Biostrings_2.78.0
## [82] knitr_1.50 gridExtra_2.3 Seqinfo_1.0.0
## [85] xfun_0.54 matrixStats_1.5.0 DT_0.34.0
## [88] visNetwork_2.1.4 stringi_1.8.7 lazyeval_0.2.2
## [91] yaml_2.3.10 evaluate_1.0.5 codetools_0.2-20
## [94] BiocManager_1.30.26 cli_3.6.5 jquerylib_0.1.4
## [97] dichromat_2.0-0.1 Rcpp_1.1.0 dbplyr_2.5.1
## [100] png_0.1-8 XML_3.99-0.19 parallel_4.5.2
## [103] assertthat_0.2.1 blob_1.2.4 prettyunits_1.2.0
## [106] AnnotationForge_1.52.0 bitops_1.0-9 viridisLite_0.4.2
## [109] scales_1.4.0 crayon_1.5.3 GetoptLong_1.0.5
## [112] rlang_1.1.6 cowplot_1.2.0 fastmatch_1.1-6
## [115] KEGGREST_1.50.0 TSP_1.2-5
I am going to perform functional enrichment of GO terms using ViSEAGO.
I am following this vignette: http://bioconductor.unipi.it/packages/devel/bioc/vignettes/ViSEAGO/inst/doc/ViSEAGO.html.
In the next chunk I am loading in my DESeq data. These results are ordered by adjusted p-value. As a reminder, negative LFC = higher in Oral tissue, and positive LFC = higher in Aboral tissue.
#load in DESeq results
DESeq <- read.csv("../output_RNA/differential_expression/DESeq_results.csv", header = TRUE) %>% dplyr::rename("query" ="X")
#make dataframes of just differentially expressed genes for each LFC direction
DE_05_Aboral <- DESeq %>% filter(padj < 0.05 & log2FoldChange < -1)
DE_05_OralEpi <- DESeq %>% filter(padj < 0.05& log2FoldChange > 1)
#load in annotation data
annot_tab <- read.delim("../references/annotation/protein-GO.tsv") %>% dplyr::rename(GOs = GeneOntologyIDs)
#filter annotation data for just expressed genes with GO annotations
annot_tab <- annot_tab %>% filter(query %in% DESeq$query)
annot_tab$GOs <- gsub("; ", ";", annot_tab$GOs)
annot_tab$GOs[annot_tab$GOs==""] <- NA
annot_tab <- annot_tab %>% filter(!is.na(GOs))
nrow(annot_tab)
## [1] 10654
nrow(annot_tab)/nrow(DESeq)
## [1] 0.7365874
10638/14464 genes in our dataset have GO information in this file. That is 74%.
sum(annot_tab$query %in% DE_05_Aboral$query)
## [1] 376
sum(annot_tab$query %in% DE_05_Aboral$query)/nrow(DE_05_Aboral)
## [1] 0.6811594
sum(annot_tab$query %in% DE_05_OralEpi$query)
## [1] 825
sum(annot_tab$query %in% DE_05_OralEpi$query)/nrow(DE_05_OralEpi)
## [1] 0.6584198
379/558 genes that are significantly upregulated in the Aboral tissue have annotation information. That is 68% of the genes.
796/1216 genes that are significantly upregulated in the Oral Epidermis tissue have annotation information. That is 71% of the genes.
##Get a list of GO Terms for all genes
annots <- annot_tab %>% dplyr::select(query,GOs,ProteinNames) %>% dplyr::rename("GO.terms" = GOs)
# format into the format required by ViSEAGO for custom mappings
Custom_GOs <- annots %>%
# Separate GO terms into individual rows
separate_rows(GO.terms, sep = ";") %>%
# Add necessary columns
mutate(
taxid = "pacuta",
gene_symbol = ProteinNames,
evidence = "SwissProt"
) %>%
# Rename columns
dplyr::rename(
gene_id = query,
GOID = GO.terms
) %>%
dplyr::select(taxid, gene_id, gene_symbol, GOID, evidence)
Custom_GOs_valid <- Custom_GOs %>% filter(GOID %in% keys(GO.db))
write.table(Custom_GOs_valid, "../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt",row.names = FALSE, sep = "\t", quote = FALSE,col.names=TRUE)
length(unique(Custom_GOs$gene_id))
## [1] 10654
length(unique(Custom_GOs_valid$gene_id))
## [1] 10652
We seem to have lost one gene when filtering for valid GO terms, so I need to account for that below.
Custom_Pacuta <- ViSEAGO::Custom2GO("../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt")
## 'select()' returned 1:1 mapping between keys and columns
myGENE2GO_Pacuta <- ViSEAGO::annotate(
id="pacuta",
Custom_Pacuta
)
selection <- DESeq %>% filter(query %in% Custom_GOs_valid$gene_id) %>%
mutate(DE_05_Aboral = ifelse(query %in% DE_05_Aboral$query, 1,0)) %>%
mutate(DE_05_Oral = ifelse(query %in% DE_05_OralEpi$query, 1,0)) %>%
mutate(expressed = 1)
selection_Aboral <- selection %>% pull(DE_05_Aboral) %>% as.factor()
names(selection_Aboral) <- selection %>% pull(query)
selection_Oral <- selection %>% pull(DE_05_Oral) %>% as.factor()
names(selection_Oral) <- selection %>% pull(query)
expressed <- selection %>% pull(expressed) %>% as.factor()
names(expressed) <- selection %>% pull(query)
# create viseago object
selection <- names(selection_Oral)[selection_Oral==1]
background <- names(expressed)
BP_Oral <- ViSEAGO::create_topGOdata(
geneSel=selection,
allGenes=background,
gene2GO=myGENE2GO_Pacuta,
ont="BP",
nodeSize=5
)
##
## Building most specific GOs .....
## ( 8795 GO terms found. )
##
## Build GO DAG topology ..........
## ( 12479 GO terms and 26992 relations. )
##
## Annotating nodes ...............
## ( 9548 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Oral <- topGO::runTest(
BP_Oral,
algorithm ="elim",
statistic = "fisher"
)
##
## -- Elim Algorithm --
##
## the algorithm is scoring 4474 nontrivial nodes
## parameters:
## test statistic: fisher
## cutOff: 0.01
##
## Level 19: 2 nodes to be scored (0 eliminated genes)
##
## Level 18: 2 nodes to be scored (0 eliminated genes)
##
## Level 17: 8 nodes to be scored (0 eliminated genes)
##
## Level 16: 18 nodes to be scored (0 eliminated genes)
##
## Level 15: 40 nodes to be scored (0 eliminated genes)
##
## Level 14: 43 nodes to be scored (24 eliminated genes)
##
## Level 13: 72 nodes to be scored (31 eliminated genes)
##
## Level 12: 159 nodes to be scored (57 eliminated genes)
##
## Level 11: 308 nodes to be scored (72 eliminated genes)
##
## Level 10: 459 nodes to be scored (148 eliminated genes)
##
## Level 9: 589 nodes to be scored (374 eliminated genes)
##
## Level 8: 646 nodes to be scored (470 eliminated genes)
##
## Level 7: 694 nodes to be scored (583 eliminated genes)
##
## Level 6: 642 nodes to be scored (1315 eliminated genes)
##
## Level 5: 453 nodes to be scored (1702 eliminated genes)
##
## Level 4: 243 nodes to be scored (1717 eliminated genes)
##
## Level 3: 79 nodes to be scored (1874 eliminated genes)
##
## Level 2: 16 nodes to be scored (1874 eliminated genes)
##
## Level 1: 1 nodes to be scored (1874 eliminated genes)
BP_Results <- ViSEAGO::merge_enrich_terms(
Input = list( Oral_elim = c("BP_Oral", "elim_Oral")))
## 'select()' returned 1:1 mapping between keys and columns
# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
## BP_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10652
## available_genes_significant: 825
## feasible_genes: 9548
## feasible_genes_significant: 717
## genes_nodeSize: 5
## nodes_number: 6666
## edges_number: 14007
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6666
## GO_significant: 63
## feasible_genes: 9548
## feasible_genes_significant: 717
## genes_nodeSize: 5
## Nontrivial_nodes: 4474
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## - enrich GOs (in at least one list): 63 GO terms of 1 conditions.
## Oral_elim : 63 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
BP_Results,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral.tsv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance=c("Wang")
)
myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
## BP_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10652
## available_genes_significant: 825
## feasible_genes: 9548
## feasible_genes_significant: 717
## genes_nodeSize: 5
## nodes_number: 6666
## edges_number: 14007
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6666
## GO_significant: 63
## feasible_genes: 9548
## feasible_genes_significant: 717
## genes_nodeSize: 5
## Nontrivial_nodes: 4474
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## - enrich GOs (in at least one list): 63 GO terms of 1 conditions.
## Oral_elim : 63 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Oral <- ViSEAGO::GOterms_heatmap(
myGOs,
showIC = TRUE,
showGOlabels = TRUE,
GO.tree = list(
tree = list(distance = "Wang", aggreg.method = "ward.D2"),
cut = list(
dynamic = list(
pamStage = TRUE,
pamRespectsDendro = TRUE,
deepSplit = 2,
minClusterSize = 2
)
)
),
samples.tree = NULL
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the dendextend package.
## Please report the issue at <https://github.com/talgalili/dendextend/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Oral,
"GOterms")
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Oral,
height=1000,
width=700,
"GOterms", file="../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral_cluster_heatmap_Wang_wardD2.png")
## quartz_off_screen
## 2
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Oral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2_Oral,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral_cluster_heatmap_Wang_wardD2.tsv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Oral,
"GOterms")
# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Oral<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2_Oral,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Oral,
"GOclusters")
Wang_clusters_wardD2_Oral_init <- Wang_clusters_wardD2_Oral
# GOclusters heatmap
Wang_clusters_wardD2_Oral <-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2_Oral_init,
tree=list(
distance="BMA",
aggreg.method="ward.D2",
rotate=labels(as.dendrogram(hclust(Wang_clusters_wardD2_Oral_init@clusters_dist[["BMA"]],
method ="ward.D2")))[c(2,3,1,4,5,10,11,13,14,12,7,8,9,6)]
)
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Oral,
"GOclusters")
DE_05_SwissProt <- read.csv("../output_RNA/differential_expression/DE_05_SwissProt_annotation.csv")
DESeq_SwissProt <- read.csv("../output_RNA/differential_expression/DESeq_SwissProt_annotation.csv")
Oral_enriched_clustered <- Wang_clusters_wardD2_Oral@enrich_GOs@data
top_cluster <- Oral_enriched_clustered %>% arrange(Oral_elim.pvalue) %>% head(1) %>% pull(GO.cluster)
Oral_enriched_clustered %>% dplyr::filter(GO.cluster == top_cluster)
## GO.cluster IC GO.ID
## <char> <num> <char>
## 1: 9 6.41 GO:1901607
## 2: 9 9.26 GO:0046314
## 3: 9 8.86 GO:0051923
## 4: 9 9.11 GO:0050427
## 5: 9 8.16 GO:0009208
## term
## <char>
## 1: alpha-amino acid biosynthetic process
## 2: phosphocreatine biosynthetic process
## 3: sulfation
## 4: 3'-phosphoadenosine 5'-phosphosulfate metabolic process
## 5: pyrimidine ribonucleoside triphosphate metabolic process
## definition
## <char>
## 1: <NA>
## 2: The chemical reactions and pathways resulting in the formation of phosphocreatine, a phosphagen of creatine which is synthesized and broken down by creatine phosphokinase.
## 3: The addition of a sulfate group to a molecule.
## 4: The chemical reactions and pathways involving 3'-phosphoadenosine 5'-phosphosulfate, a naturally occurring mixed anhydride. It is an intermediate in the formation of a variety of sulfo compounds in biological systems.
## 5: The chemical reactions and pathways involving pyrimidine ribonucleoside triphosphate, a compound consisting of a pyrimidine base linked to a ribose sugar esterified with triphosphate on the sugar.
## Oral_elim.genes_frequency Oral_elim.pvalue Oral_elim.-log10_pvalue
## <char> <num> <num>
## 1: 16.867% (14/83) 3.281350e-03 2.48
## 2: 66.667% (4/6) 4.184989e-04 3.38
## 3: 44.444% (4/9) 2.926368e-03 2.53
## 4: 85.714% (6/7) 1.152521e-06 5.94
## 5: 36.364% (4/11) 6.788097e-03 2.17
## Oral_elim.Significant_genes
## <char>
## 1: Pocillopora_acuta_HIv2___RNAseq.g10854.t2;Pocillopora_acuta_HIv2___RNAseq.g12417.t1;Pocillopora_acuta_HIv2___RNAseq.g1504.t1;Pocillopora_acuta_HIv2___RNAseq.g16699.t1;Pocillopora_acuta_HIv2___RNAseq.g19367.t1;Pocillopora_acuta_HIv2___RNAseq.g23634.t1;Pocillopora_acuta_HIv2___RNAseq.g24790.t1;Pocillopora_acuta_HIv2___RNAseq.g27978.t1;Pocillopora_acuta_HIv2___RNAseq.g28641.t1;Pocillopora_acuta_HIv2___RNAseq.g7498.t1;Pocillopora_acuta_HIv2___TS.g19253.t1;Pocillopora_acuta_HIv2___TS.g3401.t1;Pocillopora_acuta_HIv2___TS.g6241.t1e;Pocillopora_acuta_HIv2___TS.g7609.t1b
## 2: Pocillopora_acuta_HIv2___RNAseq.g15574.t1;Pocillopora_acuta_HIv2___RNAseq.g15577.t1;Pocillopora_acuta_HIv2___RNAseq.g15584.t1;Pocillopora_acuta_HIv2___RNAseq.g2258.t1
## 3: Pocillopora_acuta_HIv2___RNAseq.10969_t;Pocillopora_acuta_HIv2___RNAseq.g21538.t1;Pocillopora_acuta_HIv2___RNAseq.g9090.t1;Pocillopora_acuta_HIv2___RNAseq.g9093.t1
## 4: Pocillopora_acuta_HIv2___RNAseq.10969_t;Pocillopora_acuta_HIv2___RNAseq.g12481.t1;Pocillopora_acuta_HIv2___RNAseq.g21538.t1;Pocillopora_acuta_HIv2___RNAseq.g25759.t1;Pocillopora_acuta_HIv2___RNAseq.g9089.t1;Pocillopora_acuta_HIv2___RNAseq.g9093.t1
## 5: Pocillopora_acuta_HIv2___RNAseq.g11840.t1;Pocillopora_acuta_HIv2___RNAseq.g21884.t1;Pocillopora_acuta_HIv2___RNAseq.g3911.t1;Pocillopora_acuta_HIv2___TS.g19040.t1
## Oral_elim.Significant_genes_symbol
## <char>
## 1: Phenylalanine-4-hydroxylase (PAH) (EC 1.14.16.1) (Phe-4-monooxygenase);Cystathionine gamma-lyase (CGL) (CSE) (EC 4.4.1.1) (Cysteine desulfhydrase) (Cysteine-protein sulfhydrase) (Gamma-cystathionase) (Homocysteine desulfhydrase) (EC 4.4.1.2);L-threonine ammonia-lyase (EC 4.3.1.19) (L-serine ammonia-lyase) (EC 4.3.1.17) (Threonine dehydratase);Glutathione hydrolase 1 proenzyme (EC 3.4.19.13) (Gamma-glutamyltransferase 1) (Gamma-glutamyltranspeptidase 1) (GGT 1) (EC 2.3.2.2) (Leukotriene-C4 hydrolase) (EC 3.4.19.14) (CD antigen CD224) [Cleaved into: Glutathione hydrolase 1 heavy chain; Glutathione hydrolase 1 light chain];Methylenetetrahydrofolate reductase (NADPH) (EC 1.5.1.53);Acireductone dioxygenase (Acireductone dioxygenase (Fe(2+)-requiring)) (ARD') (Fe-ARD) (EC 1.13.11.54) (Acireductone dioxygenase (Ni(2+)-requiring)) (ARD) (Ni-ARD) (EC 1.13.11.53);Death domain-containing ATP nucleosidase (DDAN) (EC 3.5.99.-);Plasma membrane calcium-transporting ATPase 4 (PMCA4) (EC 7.2.2.10) (Matrix-remodeling-associated protein 1) (Plasma membrane calcium ATPase isoform 4) (Plasma membrane calcium pump isoform 4);Protein TabA;Branched-chain-amino-acid aminotransferase (EC 2.6.1.42);Delta-1-pyrroline-5-carboxylate synthase (P5CS) (Aldehyde dehydrogenase family 18 member A1) [Includes: Glutamate 5-kinase (GK) (EC 2.7.2.11) (Gamma-glutamyl kinase); Gamma-glutamyl phosphate reductase (GPR) (EC 1.2.1.41) (Glutamate-5-semialdehyde dehydrogenase) (Glutamyl-gamma-semialdehyde dehydrogenase)];Asparagine synthetase domain-containing protein 1;Glutamate dehydrogenase (GDH) (EC 1.4.1.3) (NAD(P)H-utilizing glutamate dehydrogenase);Phosphoserine phosphatase (PSP) (PSPase) (EC 3.1.3.3) (O-phosphoserine phosphohydrolase)
## 2: Creatine kinase, testis isozyme (EC 2.7.3.2);Creatine kinase M-type (EC 2.7.3.2) (Creatine kinase M chain) (Creatine phosphokinase M-type) (CPK-M) (M-CK);Creatine kinase M-type (EC 2.7.3.2) (Creatine kinase M chain) (Creatine phosphokinase M-type) (CPK-M) (M-CK);Arginine kinase (AK) (EC 2.7.3.3)
## 3: Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);Sulfotransferase 1C2A (ST1C2A) (rSULT1C2A) (EC 2.8.2.1) (Sulfotransferase K2);Sulfotransferase 1C4 (ST1C4) (EC 2.8.2.1) (Sulfotransferase 1C2) (SULT1C#2)
## 4: Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);Sulfotransferase 1C4 (ST1C4) (EC 2.8.2.1);Sulfotransferase 1B1 (ST1B1) (EC 2.8.2.1) (Sulfotransferase 1B2) (Sulfotransferase family cytosolic 1B member 1) (Thyroid hormone sulfotransferase);Bifunctional 3'-phosphoadenosine 5'-phosphosulfate synthase (PAPS synthase) (PAPS synthetase) (PAPSS) (Sulfurylase kinase) (SK) [Includes: Sulfate adenylyltransferase (EC 2.7.7.4) (ATP-sulfurylase) (Sulfate adenylate transferase) (SAT); Adenylyl-sulfate kinase (EC 2.7.1.25) (3'-phosphoadenosine-5'-phosphosulfate synthase) (APS kinase) (Adenosine-5'-phosphosulfate 3'-phosphotransferase) (Adenylylsulfate 3'-phosphotransferase)];Sulfotransferase 1C4 (ST1C4) (EC 2.8.2.1);Sulfotransferase 1C4 (ST1C4) (EC 2.8.2.1) (Sulfotransferase 1C2) (SULT1C#2)
## 5: CTP synthase 1 (EC 6.3.4.2) (CTP synthetase 1) (UTP--ammonia ligase 1);Ectonucleoside triphosphate diphosphohydrolase 4 (NTPDase 4) (EC 3.6.1.15) (EC 3.6.1.6) (Golgi UDPase) (Lysosomal apyrase-like protein of 70 kDa) (Uridine-diphosphatase) (UDPase) (EC 3.6.1.42);Nucleoside diphosphate kinase homolog 5 (3'-5' exonuclease NME5) (EC 3.1.-.-);GTP:AMP phosphotransferase AK3, mitochondrial (EC 2.7.4.10) (Adenylate kinase 3) (Adenylate kinase 3 alpha-like 1) (Adenylate kinase isozyme 3)
BP_go_terms <- BP_Oral@graph@nodes
genes_by_GO <- genesInTerm(BP_Oral, BP_go_terms)
##### Clusters 1-3: development
cluster_GOs <- Oral_enriched_clustered %>% dplyr::filter(GO.cluster == 1|
GO.cluster == 2|
GO.cluster == 3) %>% pull(GO.ID)
#showSigOfNodes(BP_Oral, score(elim_Oral)[names(score(elim_Oral)) %in% cluster_GOs], firstSigNodes = length(cluster_GOs), useInfo = 'all')
cluster_genes <- unique(unlist(genes_by_GO[cluster_GOs]))
cluster_1_3_genes_DE <- DE_05_SwissProt %>% filter(query %in% cluster_genes)
write.csv(cluster_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Oral_enrich_BP_development.csv",row.names=FALSE)
##### Clusters 4-5: Transport
cluster_GOs <- Oral_enriched_clustered %>% dplyr::filter(GO.cluster == 4|
GO.cluster == 5) %>% pull(GO.ID)
cluster_genes <- unique(unlist(genes_by_GO[cluster_GOs]))
cluster_4_5_genes_DE <- DE_05_SwissProt %>% filter(query %in% cluster_genes)
write.csv(cluster_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Oral_enrich_BP_transport.csv",row.names=FALSE)
##### Clusters 6-8: Signaling + secretion
cluster_GOs <- Oral_enriched_clustered %>% dplyr::filter(GO.cluster == 6|
GO.cluster == 7|
GO.cluster == 8) %>% pull(GO.ID)
cluster_genes <- unique(unlist(genes_by_GO[cluster_GOs]))
cluster_6_8_genes_DE <- DE_05_SwissProt %>% filter(query %in% cluster_genes)
write.csv(cluster_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Oral_enrich_BP_signaling_secretion.csv",row.names=FALSE)
##### Cluster 12: Adhesion
cluster_12 <- Oral_enriched_clustered %>% dplyr::filter(GO.cluster == 12) %>% pull(GO.ID)
cluster_12_genes <- unique(unlist(genes_by_GO[cluster_12]))
cluster_12_genes_DE <- DE_05_SwissProt %>% filter(query %in% cluster_12_genes)
write.csv(cluster_12_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Oral_enrich_BP_adhesion.csv",row.names=FALSE)
##### Clusters 9-10: metabolic
cluster_GOs <- Oral_enriched_clustered %>% dplyr::filter(GO.cluster == 9|
GO.cluster == 10) %>% pull(GO.ID)
cluster_genes <- unique(unlist(genes_by_GO[cluster_GOs]))
cluster_9_10_genes_DE <- DE_05_SwissProt %>% filter(query %in% cluster_genes)
write.csv(cluster_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Oral_enrich_BP_metabolic.csv",row.names=FALSE)
##### Clusters 11-13-14: stimulus_immune
cluster_GOs <- Oral_enriched_clustered %>% dplyr::filter(GO.cluster == 6|
GO.cluster == 7|
GO.cluster == 8|GO.cluster == 11|
GO.cluster == 13|
GO.cluster == 14) %>% pull(GO.ID)
cluster_genes <- unique(unlist(genes_by_GO[cluster_GOs]))
cluster_11_13_14_genes_DE <- DE_05_SwissProt %>% filter(query %in% cluster_genes)
write.csv(cluster_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Oral_enrich_BP_stimulus_immune.csv",row.names=FALSE)
#DE_05_SwissProt %>% filter(query %in%unique(unlist(genes_by_GO[Aboral_enriched_clustered %>% pull(GO.ID)])))
# create viseago object
selection <- names(selection_Aboral)[selection_Aboral==1]
background <- names(expressed)
BP_Aboral <- ViSEAGO::create_topGOdata(
geneSel=selection,
allGenes=background,
gene2GO=myGENE2GO_Pacuta,
ont="BP",
nodeSize=5
)
##
## Building most specific GOs .....
## ( 8795 GO terms found. )
##
## Build GO DAG topology ..........
## ( 12479 GO terms and 26992 relations. )
##
## Annotating nodes ...............
## ( 9548 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Aboral <- topGO::runTest(
BP_Aboral,
algorithm ="elim",
statistic = "fisher"
)
##
## -- Elim Algorithm --
##
## the algorithm is scoring 3054 nontrivial nodes
## parameters:
## test statistic: fisher
## cutOff: 0.01
##
## Level 17: 1 nodes to be scored (0 eliminated genes)
##
## Level 16: 6 nodes to be scored (0 eliminated genes)
##
## Level 15: 20 nodes to be scored (0 eliminated genes)
##
## Level 14: 24 nodes to be scored (0 eliminated genes)
##
## Level 13: 42 nodes to be scored (164 eliminated genes)
##
## Level 12: 81 nodes to be scored (230 eliminated genes)
##
## Level 11: 179 nodes to be scored (232 eliminated genes)
##
## Level 10: 280 nodes to be scored (252 eliminated genes)
##
## Level 9: 375 nodes to be scored (464 eliminated genes)
##
## Level 8: 449 nodes to be scored (610 eliminated genes)
##
## Level 7: 469 nodes to be scored (745 eliminated genes)
##
## Level 6: 482 nodes to be scored (1275 eliminated genes)
##
## Level 5: 358 nodes to be scored (2195 eliminated genes)
##
## Level 4: 193 nodes to be scored (2392 eliminated genes)
##
## Level 3: 78 nodes to be scored (2477 eliminated genes)
##
## Level 2: 16 nodes to be scored (2635 eliminated genes)
##
## Level 1: 1 nodes to be scored (2635 eliminated genes)
BP_Results <- ViSEAGO::merge_enrich_terms(
Input = list(Aboral_elim = c("BP_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns
# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
## Aboral_elim
## BP_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10652
## available_genes_significant: 376
## feasible_genes: 9548
## feasible_genes_significant: 326
## genes_nodeSize: 5
## nodes_number: 6666
## edges_number: 14007
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6666
## GO_significant: 123
## feasible_genes: 9548
## feasible_genes_significant: 326
## genes_nodeSize: 5
## Nontrivial_nodes: 3054
## - enrichment pvalue cutoff:
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 123 GO terms of 1 conditions.
## Aboral_elim : 123 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
BP_Results,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral.tsv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance=c("Wang")
)
myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Aboral_elim
## BP_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10652
## available_genes_significant: 376
## feasible_genes: 9548
## feasible_genes_significant: 326
## genes_nodeSize: 5
## nodes_number: 6666
## edges_number: 14007
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6666
## GO_significant: 123
## feasible_genes: 9548
## feasible_genes_significant: 326
## genes_nodeSize: 5
## Nontrivial_nodes: 3054
## - enrichment pvalue cutoff:
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 123 GO terms of 1 conditions.
## Aboral_elim : 123 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Aboral <- ViSEAGO::GOterms_heatmap(
myGOs,
showIC = TRUE,
showGOlabels = TRUE,
GO.tree = list(
tree = list(distance = "Wang", aggreg.method = "ward.D2"),
cut = list(
dynamic = list(
pamStage = TRUE,
pamRespectsDendro = TRUE,
deepSplit = 2,
minClusterSize = 5
)
)
),
samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Aboral,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Aboral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2_Aboral,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral_cluster_heatmap_Wang_wardD2.tsv"
)
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Aboral,
height=1500,
width=700,
"GOterms", file="../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral_cluster_heatmap_Wang_wardD2.png")
## quartz_off_screen
## 2
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Aboral,
"GOterms")
# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Aboral<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2_Aboral,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Aboral,
"GOclusters")
Wang_clusters_wardD2_Aboral_init <- Wang_clusters_wardD2_Aboral
# GOclusters heatmap
Wang_clusters_wardD2_Aboral <-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2_Aboral_init,
tree=list(
distance="BMA",
aggreg.method="ward.D2",
rotate=labels(as.dendrogram(hclust(
Wang_clusters_wardD2_Aboral_init@clusters_dist[["BMA"]],
method ="ward.D2")))[c(1:8,11,12,9:10,13:16)]
)
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Aboral,
"GOclusters")
Aboral_enriched_clustered <- Wang_clusters_wardD2_Aboral@enrich_GOs@data
top_cluster <- Aboral_enriched_clustered %>% arrange(Aboral_elim.pvalue) %>% head(1) %>% pull(GO.cluster)
Aboral_enriched_clustered %>% dplyr::filter(GO.cluster == top_cluster)
## GO.cluster IC GO.ID
## <char> <num> <char>
## 1: 2 4.17 GO:0007155
## 2: 2 8.35 GO:0016339
## 3: 2 6.47 GO:0007156
## 4: 2 7.69 GO:0007157
## 5: 2 6.28 GO:0007528
## 6: 2 7.84 GO:0099560
## term
## <char>
## 1: cell adhesion
## 2: calcium-dependent cell-cell adhesion via plasma membrane cell adhesion molecules
## 3: homophilic cell adhesion via plasma membrane adhesion molecules
## 4: heterophilic cell-cell adhesion via plasma membrane cell adhesion molecules
## 5: neuromuscular junction development
## 6: synaptic membrane adhesion
## definition
## <char>
## 1: The attachment of a cell, either to another cell or to an underlying substrate such as the extracellular matrix, via cell adhesion molecules.
## 2: The attachment of one cell to another cell via adhesion molecules that require the presence of calcium for the interaction.
## 3: The attachment of a plasma membrane adhesion molecule in one cell to an identical molecule in an adjacent cell.
## 4: The attachment of an adhesion molecule in one cell to a nonidentical adhesion molecule in an adjacent cell.
## 5: A process that is carried out at the cellular level which results in the assembly, arrangement of constituent parts, or disassembly of a neuromuscular junction.
## 6: The attachment of presynaptic membrane to postsynaptic membrane via adhesion molecules that are at least partially embedded in the plasma membrane.
## Aboral_elim.genes_frequency Aboral_elim.pvalue Aboral_elim.-log10_pvalue
## <char> <num> <num>
## 1: 10.201% (66/647) 6.202129e-04 3.21
## 2: 33.333% (5/15) 1.018822e-04 3.99
## 3: 19.388% (19/98) 6.010280e-10 9.22
## 4: 37.931% (11/29) 1.240954e-09 8.91
## 5: 10% (8/80) 5.799100e-03 2.24
## 6: 29.167% (7/24) 1.063238e-05 4.97
## Aboral_elim.Significant_genes
## <char>
## 1: Pocillopora_acuta_HIv2___RNAseq.2533_t;Pocillopora_acuta_HIv2___RNAseq.6832_t;Pocillopora_acuta_HIv2___RNAseq.g10343.t1;Pocillopora_acuta_HIv2___RNAseq.g10385.t1;Pocillopora_acuta_HIv2___RNAseq.g1119.t1;Pocillopora_acuta_HIv2___RNAseq.g11876.t1;Pocillopora_acuta_HIv2___RNAseq.g12258.t1;Pocillopora_acuta_HIv2___RNAseq.g12272.t1;Pocillopora_acuta_HIv2___RNAseq.g12278.t1;Pocillopora_acuta_HIv2___RNAseq.g12281.t1;Pocillopora_acuta_HIv2___RNAseq.g12374.t1;Pocillopora_acuta_HIv2___RNAseq.g12987.t1;Pocillopora_acuta_HIv2___RNAseq.g13823.t1;Pocillopora_acuta_HIv2___RNAseq.g14253.t1;Pocillopora_acuta_HIv2___RNAseq.g14347.t1;Pocillopora_acuta_HIv2___RNAseq.g14375.t1;Pocillopora_acuta_HIv2___RNAseq.g16484.t1;Pocillopora_acuta_HIv2___RNAseq.g17517.t1;Pocillopora_acuta_HIv2___RNAseq.g17519.t1;Pocillopora_acuta_HIv2___RNAseq.g18125.t1;Pocillopora_acuta_HIv2___RNAseq.g18385.t1;Pocillopora_acuta_HIv2___RNAseq.g19085.t1;Pocillopora_acuta_HIv2___RNAseq.g20876.t1;Pocillopora_acuta_HIv2___RNAseq.g21477.t1;Pocillopora_acuta_HIv2___RNAseq.g21479.t1;Pocillopora_acuta_HIv2___RNAseq.g21481.t1;Pocillopora_acuta_HIv2___RNAseq.g23886.t1;Pocillopora_acuta_HIv2___RNAseq.g24688.t1;Pocillopora_acuta_HIv2___RNAseq.g25351.t1;Pocillopora_acuta_HIv2___RNAseq.g25739.t1;Pocillopora_acuta_HIv2___RNAseq.g25761.t1;Pocillopora_acuta_HIv2___RNAseq.g27031.t1;Pocillopora_acuta_HIv2___RNAseq.g28109.t1;Pocillopora_acuta_HIv2___RNAseq.g28196.t1;Pocillopora_acuta_HIv2___RNAseq.g28226.t2;Pocillopora_acuta_HIv2___RNAseq.g28502.t1;Pocillopora_acuta_HIv2___RNAseq.g28971.t1a;Pocillopora_acuta_HIv2___RNAseq.g29143.t2;Pocillopora_acuta_HIv2___RNAseq.g29795.t1;Pocillopora_acuta_HIv2___RNAseq.g3826.t1;Pocillopora_acuta_HIv2___RNAseq.g3939.t1;Pocillopora_acuta_HIv2___RNAseq.g5181.t1;Pocillopora_acuta_HIv2___RNAseq.g5342.t1;Pocillopora_acuta_HIv2___RNAseq.g5896.t1;Pocillopora_acuta_HIv2___RNAseq.g682.t1;Pocillopora_acuta_HIv2___RNAseq.g7895.t1;Pocillopora_acuta_HIv2___RNAseq.g805.t1;Pocillopora_acuta_HIv2___RNAseq.g8148.t1;Pocillopora_acuta_HIv2___RNAseq.g9049.t1;Pocillopora_acuta_HIv2___RNAseq.g9253.t1;Pocillopora_acuta_HIv2___TS.g10825.t1;Pocillopora_acuta_HIv2___TS.g11002.t4;Pocillopora_acuta_HIv2___TS.g12860.t1;Pocillopora_acuta_HIv2___TS.g14528.t1a;Pocillopora_acuta_HIv2___TS.g15590.t1;Pocillopora_acuta_HIv2___TS.g17199.t1;Pocillopora_acuta_HIv2___TS.g23602.t1;Pocillopora_acuta_HIv2___TS.g24030.t1;Pocillopora_acuta_HIv2___TS.g24743.t1;Pocillopora_acuta_HIv2___TS.g30706.t1;Pocillopora_acuta_HIv2___TS.g30914.t1;Pocillopora_acuta_HIv2___TS.g3265.t1;Pocillopora_acuta_HIv2___TS.g5115.t2;Pocillopora_acuta_HIv2___TS.g5308.t2;Pocillopora_acuta_HIv2___TS.g8258.t1;Pocillopora_acuta_HIv2___TS.g8259.t1b
## 2: Pocillopora_acuta_HIv2___RNAseq.g16484.t1;Pocillopora_acuta_HIv2___RNAseq.g17517.t1;Pocillopora_acuta_HIv2___RNAseq.g17519.t1;Pocillopora_acuta_HIv2___TS.g30706.t1;Pocillopora_acuta_HIv2___TS.g5308.t2
## 3: Pocillopora_acuta_HIv2___RNAseq.2533_t;Pocillopora_acuta_HIv2___RNAseq.g10343.t1;Pocillopora_acuta_HIv2___RNAseq.g12987.t1;Pocillopora_acuta_HIv2___RNAseq.g14253.t1;Pocillopora_acuta_HIv2___RNAseq.g16484.t1;Pocillopora_acuta_HIv2___RNAseq.g17517.t1;Pocillopora_acuta_HIv2___RNAseq.g17519.t1;Pocillopora_acuta_HIv2___RNAseq.g19085.t1;Pocillopora_acuta_HIv2___RNAseq.g29143.t2;Pocillopora_acuta_HIv2___RNAseq.g5181.t1;Pocillopora_acuta_HIv2___RNAseq.g682.t1;Pocillopora_acuta_HIv2___RNAseq.g805.t1;Pocillopora_acuta_HIv2___RNAseq.g8148.t1;Pocillopora_acuta_HIv2___TS.g15590.t1;Pocillopora_acuta_HIv2___TS.g23602.t1;Pocillopora_acuta_HIv2___TS.g24743.t1;Pocillopora_acuta_HIv2___TS.g30706.t1;Pocillopora_acuta_HIv2___TS.g5308.t2;Pocillopora_acuta_HIv2___TS.g8258.t1
## 4: Pocillopora_acuta_HIv2___RNAseq.2533_t;Pocillopora_acuta_HIv2___RNAseq.g12987.t1;Pocillopora_acuta_HIv2___RNAseq.g14253.t1;Pocillopora_acuta_HIv2___RNAseq.g28971.t1a;Pocillopora_acuta_HIv2___RNAseq.g8148.t1;Pocillopora_acuta_HIv2___RNAseq.g9253.t1;Pocillopora_acuta_HIv2___TS.g15590.t1;Pocillopora_acuta_HIv2___TS.g23602.t1;Pocillopora_acuta_HIv2___TS.g30706.t1;Pocillopora_acuta_HIv2___TS.g30914.t1;Pocillopora_acuta_HIv2___TS.g3265.t1
## 5: Pocillopora_acuta_HIv2___RNAseq.g21477.t1;Pocillopora_acuta_HIv2___RNAseq.g21479.t1;Pocillopora_acuta_HIv2___RNAseq.g21481.t1;Pocillopora_acuta_HIv2___RNAseq.g614.t1;Pocillopora_acuta_HIv2___RNAseq.g8914.t1;Pocillopora_acuta_HIv2___RNAseq.g8916.t1;Pocillopora_acuta_HIv2___TS.g18622.t1b;Pocillopora_acuta_HIv2___TS.g29912.t2
## 6: Pocillopora_acuta_HIv2___RNAseq.g12258.t1;Pocillopora_acuta_HIv2___RNAseq.g12281.t1;Pocillopora_acuta_HIv2___RNAseq.g805.t1;Pocillopora_acuta_HIv2___RNAseq.g9253.t1;Pocillopora_acuta_HIv2___TS.g30914.t1;Pocillopora_acuta_HIv2___TS.g3265.t1;Pocillopora_acuta_HIv2___TS.g8259.t1b
## Aboral_elim.Significant_genes_symbol
## <char>
## 1: Hemicentin-1 (Fibulin-6) (FIBL-6);Protein turtle homolog B (Immunoglobulin superfamily member 9B) (IgSF9B);Protocadherin Fat 3 (FAT tumor suppressor homolog 3);Fibroblast growth factor receptor-like 1 (FGF receptor-like protein 1) (FGF homologous factor receptor) (FGFR-like protein) (Fibroblast growth factor receptor 5) (FGFR-5);Collagen alpha-6(VI) chain;Mucin-like protein;Receptor-type tyrosine-protein phosphatase S (R-PTP-S) (EC 3.1.3.48) (Chick receptor tyrosine phosphatase alpha) (CRYP alpha) (CRYPalpha);Receptor-type tyrosine-protein phosphatase alpha (Protein-tyrosine phosphatase alpha) (R-PTP-alpha) (EC 3.1.3.48) (LCA-related phosphatase) (PTPTY-28);Receptor-type tyrosine-protein phosphatase alpha (Protein-tyrosine phosphatase alpha) (R-PTP-alpha) (EC 3.1.3.48) (LCA-related phosphatase) (PTPTY-28);Receptor-type tyrosine-protein phosphatase F (EC 3.1.3.48) (Leukocyte common antigen related) (LAR);Tissue inhibitor of metalloproteinase;Protocadherin Fat 4 (hFat4) (Cadherin family member 14) (FAT tumor suppressor homolog 4) (Fat-like cadherin protein FAT-J);Mucin-like protein;Hemicentin-1 (Fibulin-6) (FIBL-6);Collagen alpha-4(VI) chain;Collagen alpha-5(VI) chain (Collagen alpha-1(XXIX) chain);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Melanotransferrin (Membrane-bound transferrin-like protein p97) (MTf) (CD antigen CD228);BAG family molecular chaperone regulator 4 (BAG-4) (Bcl-2-associated athanogene 4) (Silencer of death domains);Halomucin;Melanotransferrin (Membrane-bound transferrin-like protein p97) (MTf) (CD antigen CD228);Tenascin (TN) (Cytotactin) (GMEM) (GP 150-225) (Glioma-associated-extracellular matrix antigen) (Hexabrachion) (JI) (Myotendinous antigen) (Neuronectin) (Tenascin-C) (TN-C);Tenascin (TN) (Cytotactin) (GMEM) (GP 150-225) (Glioma-associated-extracellular matrix antigen) (Hexabrachion) (JI) (Myotendinous antigen) (Neuronectin) (Tenascin-C) (TN-C);Tenascin (TN) (Cytotactin) (GMEM) (GP 150-225) (Glioma-associated-extracellular matrix antigen) (Hexabrachion) (JI) (Myotendinous antigen) (Neuronectin) (Tenascin-C) (TN-C);Laminin subunit beta-1 (Laminin B1 chain) (Laminin-1 subunit beta) (Laminin-10 subunit beta) (Laminin-12 subunit beta) (Laminin-2 subunit beta) (Laminin-6 subunit beta) (Laminin-8 subunit beta);Neurexin-1 (Neurexin I-alpha) (Neurexin-1-alpha);Mammalian ependymin-related protein 1 (MERP-1);Ephrin-B2 (ELF-2) (EPH-related receptor tyrosine kinase ligand 5) (LERK-5) (HTK ligand) (HTK-L);Collagen alpha-6(VI) chain;Dystonin (Bullous pemphigoid antigen 1) (BPA) (Dystonia musculorum protein) (Hemidesmosomal plaque protein) (Microtubule actin cross-linking factor 2);Fibulin-2 (FIBL-2);Aggrecan core protein (Cartilage-specific proteoglycan core protein) (CSPCP) (Chondroitin sulfate proteoglycan core protein 1) (Chondroitin sulfate proteoglycan 1) [Cleaved into: Aggrecan core protein 2];Neurogenic locus Notch protein [Cleaved into: Processed neurogenic locus Notch protein];Mucin-like protein;Uromodulin (Tamm-Horsfall urinary glycoprotein) (THP) [Cleaved into: Uromodulin, secreted form];Roundabout homolog 2;Zonadhesin;Testican-2 (SPARC/osteonectin, CWCV, and Kazal-like domains proteoglycan 2);Spondin-1 (F-spondin);Roundabout homolog 2;Protein jagged-1 (Jagged1) (hJ1) (CD antigen CD339);Probable N-acetyltransferase camello (EC 2.3.1.-) (Xcml);Roundabout homolog 1;Protein jagged-1 (Jagged1) (CD antigen CD339);Protein turtle homolog B (Immunoglobulin superfamily member 9B);Hemicentin-1 (Fibulin-6) (FIBL-6);Allograft inflammatory factor 1 (AIF-1) (Ionized calcium-binding adapter molecule 1) (Protein G1);Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Thrombospondin type-1 domain-containing protein 1 (Transmembrane molecule with thrombospondin module);Neurogenic locus Notch protein [Cleaved into: Processed neurogenic locus Notch protein];Collagen alpha-1(VI) chain;Peroxidasin homolog (EC 1.11.2.-) (Melanoma-associated antigen MG50) (Peroxidasin 1) (hsPxd01) (Vascular peroxidase 1) (p53-responsive gene 2 protein) [Cleaved into: PXDN active fragment];Hemicentin-1 (Fibulin-6) (FIBL-6);Laminin subunit alpha-1 (Laminin A chain) (Laminin-1 subunit alpha) (Laminin-3 subunit alpha) (S-laminin subunit alpha) (S-LAM alpha);Protocadherin Fat 4 (FAT tumor suppressor homolog 4) (Fat-like cadherin protein FAT-J);Neural cell adhesion molecule 2 (N-CAM-2) (NCAM-2) (Neural cell adhesion molecule RB-8) (R4B12);Hemicentin-2;Cadherin-related tumor suppressor (Protein fat) [Cleaved into: Ft-mito];Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Aggrecan core protein (Cartilage-specific proteoglycan core protein) (CSPCP);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Hemicentin-2;Neural cell adhesion molecule 1 (N-CAM-1) (NCAM-1)
## 2: Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Cadherin-related tumor suppressor (Protein fat) [Cleaved into: Ft-mito];Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14)
## 3: Hemicentin-1 (Fibulin-6) (FIBL-6);Protocadherin Fat 3 (FAT tumor suppressor homolog 3);Protocadherin Fat 4 (hFat4) (Cadherin family member 14) (FAT tumor suppressor homolog 4) (Fat-like cadherin protein FAT-J);Hemicentin-1 (Fibulin-6) (FIBL-6);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Halomucin;Roundabout homolog 2;Roundabout homolog 2;Roundabout homolog 1;Protein turtle homolog B (Immunoglobulin superfamily member 9B);Hemicentin-1 (Fibulin-6) (FIBL-6);Hemicentin-1 (Fibulin-6) (FIBL-6);Protocadherin Fat 4 (FAT tumor suppressor homolog 4) (Fat-like cadherin protein FAT-J);Hemicentin-2;Cadherin-related tumor suppressor (Protein fat) [Cleaved into: Ft-mito];Tyrosine kinase receptor Cad96Ca (EC 2.7.10.1) (Cadherin-96Ca) (Tyrosine kinase receptor HD-14);Hemicentin-2
## 4: Hemicentin-1 (Fibulin-6) (FIBL-6);Protocadherin Fat 4 (hFat4) (Cadherin family member 14) (FAT tumor suppressor homolog 4) (Fat-like cadherin protein FAT-J);Hemicentin-1 (Fibulin-6) (FIBL-6);Uromodulin (Tamm-Horsfall urinary glycoprotein) (THP) [Cleaved into: Uromodulin, secreted form];Hemicentin-1 (Fibulin-6) (FIBL-6);Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Hemicentin-1 (Fibulin-6) (FIBL-6);Protocadherin Fat 4 (FAT tumor suppressor homolog 4) (Fat-like cadherin protein FAT-J);Cadherin-related tumor suppressor (Protein fat) [Cleaved into: Ft-mito];Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48)
## 5: Tenascin (TN) (Cytotactin) (GMEM) (GP 150-225) (Glioma-associated-extracellular matrix antigen) (Hexabrachion) (JI) (Myotendinous antigen) (Neuronectin) (Tenascin-C) (TN-C);Tenascin (TN) (Cytotactin) (GMEM) (GP 150-225) (Glioma-associated-extracellular matrix antigen) (Hexabrachion) (JI) (Myotendinous antigen) (Neuronectin) (Tenascin-C) (TN-C);Tenascin (TN) (Cytotactin) (GMEM) (GP 150-225) (Glioma-associated-extracellular matrix antigen) (Hexabrachion) (JI) (Myotendinous antigen) (Neuronectin) (Tenascin-C) (TN-C);Homeobox protein SIX4 (Sine oculis homeobox homolog 4) (Skeletal muscle-specific ARE-binding protein AREC3);Low-density lipoprotein receptor-related protein 4 (LRP-4) (Multiple epidermal growth factor-like domains 7);Low-density lipoprotein receptor-related protein 4 (LRP-4) (Multiple epidermal growth factor-like domains 7);Agrin [Cleaved into: Agrin N-terminal 110 kDa subunit; Agrin C-terminal 110 kDa subunit; Agrin C-terminal 90 kDa fragment (C90); Agrin C-terminal 22 kDa fragment (C22)];E3 ubiquitin-protein ligase PDZRN3-B (EC 2.3.2.27) (PDZ domain-containing RING finger protein 3-B)
## 6: Receptor-type tyrosine-protein phosphatase S (R-PTP-S) (EC 3.1.3.48) (Chick receptor tyrosine phosphatase alpha) (CRYP alpha) (CRYPalpha);Receptor-type tyrosine-protein phosphatase F (EC 3.1.3.48) (Leukocyte common antigen related) (LAR);Protein turtle homolog B (Immunoglobulin superfamily member 9B);Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Receptor-type tyrosine-protein phosphatase delta (Protein-tyrosine phosphatase delta) (R-PTP-delta) (EC 3.1.3.48);Neural cell adhesion molecule 1 (N-CAM-1) (NCAM-1)
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Aboral,
"GOterms")
BP_go_terms <- BP_Aboral@graph@nodes
genes_by_GO <- genesInTerm(BP_Aboral, BP_go_terms)
#### Clusters 9-16: Developmental
cluster_9_16 <- Aboral_enriched_clustered %>% dplyr::filter(GO.cluster == 9|
GO.cluster == 10|
GO.cluster == 11|
GO.cluster == 12|
GO.cluster == 13|
GO.cluster == 14|
GO.cluster == 15|
GO.cluster == 16) %>% pull(GO.ID)
showSigOfNodes(BP_Aboral, score(elim_Aboral)[names(score(elim_Aboral)) %in% cluster_9_16], firstSigNodes = length(cluster_9_16), useInfo = 'all')
## Loading required package: Rgraphviz
## Loading required package: grid
##
## Attaching package: 'grid'
## The following object is masked from 'package:topGO':
##
## depth
##
## Attaching package: 'Rgraphviz'
## The following objects are masked from 'package:IRanges':
##
## from, to
## The following objects are masked from 'package:S4Vectors':
##
## from, to
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(head_from[1], head_from[2], head_to[1], head_to[2], col =
## edgeColor, : zero-length arrow is of indeterminate angle and so skipped
## $dag
## A graphNEL graph with directed edges
## Number of Nodes = 350
## Number of Edges = 683
##
## $complete.dag
## [1] "A graph with 350 nodes."
cluster_9_16_genes <- unique(unlist(genes_by_GO[cluster_9_16]))
cluster_9_16_genes_DE <- DE_05_SwissProt %>% filter(query %in%cluster_9_16_genes)
write.csv(cluster_9_16_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Aboral_enrich_BP_development.csv",row.names=FALSE)
#### Clusters 1-2: Adhesion
cluster_1_2 <- Aboral_enriched_clustered %>% dplyr::filter(GO.cluster == 1|
GO.cluster == 2) %>% pull(GO.ID)
showSigOfNodes(BP_Aboral, score(elim_Aboral)[names(score(elim_Aboral)) %in% cluster_1_2], firstSigNodes = length(cluster_1_2), useInfo = 'all')
## $dag
## A graphNEL graph with directed edges
## Number of Nodes = 22
## Number of Edges = 25
##
## $complete.dag
## [1] "A graph with 22 nodes."
cluster_1_2_genes <- unique(unlist(genes_by_GO[cluster_1_2]))
cluster_1_2_genes_DE <- DE_05_SwissProt %>% filter(query %in%cluster_1_2_genes)
write.csv(cluster_1_2_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Aboral_enrich_BP_adhesion.csv",row.names=FALSE)
#### Clusters 3_4_6: Stimulus
cluster_3_4_6 <- Aboral_enriched_clustered %>% dplyr::filter(GO.cluster == 3|
GO.cluster == 4|
GO.cluster == 6) %>% pull(GO.ID)
showSigOfNodes(BP_Aboral, score(elim_Aboral)[names(score(elim_Aboral)) %in% cluster_3_4_6], firstSigNodes = length(cluster_3_4_6), useInfo = 'all')
## $dag
## A graphNEL graph with directed edges
## Number of Nodes = 107
## Number of Edges = 187
##
## $complete.dag
## [1] "A graph with 107 nodes."
cluster_3_4_6_genes <- unique(unlist(genes_by_GO[cluster_3_4_6]))
cluster_3_4_6_genes_DE <- DE_05_SwissProt %>% filter(query %in%cluster_3_4_6_genes)
write.csv(cluster_3_4_6_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Aboral_enrich_BP_stimulus_immune.csv",row.names=FALSE)
#### Cluster 3
cluster_3 <- Aboral_enriched_clustered %>% dplyr::filter(GO.cluster == 3) %>% pull(GO.ID)
showSigOfNodes(BP_Aboral, score(elim_Aboral)[names(score(elim_Aboral)) %in% cluster_3], firstSigNodes = length(cluster_3), useInfo = 'all')
## $dag
## A graphNEL graph with directed edges
## Number of Nodes = 35
## Number of Edges = 65
##
## $complete.dag
## [1] "A graph with 35 nodes."
cluster_3_genes <- unique(unlist(genes_by_GO[cluster_3]))
cluster_3_genes_DE <- DE_05_SwissProt %>% filter(query %in%cluster_3_genes)
write.csv(cluster_3_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Aboral_enrich_BP_cluster3.csv",row.names=FALSE)
#### Cluster 4
cluster_4 <- Aboral_enriched_clustered %>% dplyr::filter(GO.cluster == 4) %>% pull(GO.ID)
showSigOfNodes(BP_Aboral, score(elim_Aboral)[names(score(elim_Aboral)) %in% cluster_4], firstSigNodes = length(cluster_4), useInfo = 'all')
## $dag
## A graphNEL graph with directed edges
## Number of Nodes = 30
## Number of Edges = 49
##
## $complete.dag
## [1] "A graph with 30 nodes."
cluster_4_genes <- unique(unlist(genes_by_GO[cluster_4]))
cluster_4_genes_DE <- DE_05_SwissProt %>% filter(query %in%cluster_4_genes)
write.csv(cluster_4_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Aboral_enrich_BP_cluster4.csv",row.names=FALSE)
#### Cluster 6
cluster_6 <- Aboral_enriched_clustered %>% dplyr::filter(GO.cluster == 6) %>% pull(GO.ID)
showSigOfNodes(BP_Aboral, score(elim_Aboral)[names(score(elim_Aboral)) %in% cluster_6], firstSigNodes = length(cluster_6), useInfo = 'all')
## $dag
## A graphNEL graph with directed edges
## Number of Nodes = 55
## Number of Edges = 87
##
## $complete.dag
## [1] "A graph with 55 nodes."
cluster_6_genes <- unique(unlist(genes_by_GO[cluster_6]))
cluster_6_genes_DE <- DE_05_SwissProt %>% filter(query %in%cluster_6_genes)
write.csv(cluster_6_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Aboral_enrich_BP_cluster6.csv",row.names=FALSE)
#### Cluster 5
cluster_5 <- Aboral_enriched_clustered %>% dplyr::filter(GO.cluster == 5) %>% pull(GO.ID)
showSigOfNodes(BP_Aboral, score(elim_Aboral)[names(score(elim_Aboral)) %in% cluster_5], firstSigNodes = length(cluster_5), useInfo = 'all')
## $dag
## A graphNEL graph with directed edges
## Number of Nodes = 57
## Number of Edges = 115
##
## $complete.dag
## [1] "A graph with 57 nodes."
cluster_5_genes <- unique(unlist(genes_by_GO[cluster_5]))
cluster_5_genes_DE <- DE_05_SwissProt %>% filter(query %in%cluster_5_genes)
write.csv(cluster_5_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Aboral_enrich_BP_cluster5.csv",row.names=FALSE)
#### Cluster 7
cluster_7 <- Aboral_enriched_clustered %>% dplyr::filter(GO.cluster == 7) %>% pull(GO.ID)
showSigOfNodes(BP_Aboral, score(elim_Aboral)[names(score(elim_Aboral)) %in% cluster_7], firstSigNodes = length(cluster_7), useInfo = 'all')
## $dag
## A graphNEL graph with directed edges
## Number of Nodes = 53
## Number of Edges = 104
##
## $complete.dag
## [1] "A graph with 53 nodes."
cluster_7_genes <- unique(unlist(genes_by_GO[cluster_7]))
cluster_7_genes_DE <- DE_05_SwissProt %>% filter(query %in%cluster_7_genes)
write.csv(cluster_7_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Aboral_enrich_BP_cluster7.csv",row.names=FALSE)
#### Cluster 8
cluster_8 <- Aboral_enriched_clustered %>% dplyr::filter(GO.cluster == 8) %>% pull(GO.ID)
showSigOfNodes(BP_Aboral, score(elim_Aboral)[names(score(elim_Aboral)) %in% cluster_8], firstSigNodes = length(cluster_8), useInfo = 'all')
## $dag
## A graphNEL graph with directed edges
## Number of Nodes = 56
## Number of Edges = 67
##
## $complete.dag
## [1] "A graph with 56 nodes."
cluster_8_genes <- unique(unlist(genes_by_GO[cluster_8]))
cluster_8_genes_DE <- DE_05_SwissProt %>% filter(query %in%cluster_8_genes)
write.csv(cluster_8_genes,"../output_RNA/differential_expression/semantic-enrichment/cluster_gene_lists/Aboral_enrich_BP_cluster8.csv",row.names=FALSE)
#DE_05_SwissProt %>% filter(query %in%unique(unlist(genes_by_GO[Aboral_enriched_clustered %>% pull(GO.ID)])))
clustered_Oral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Oral@enrich_GOs@data)
cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Oral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>%
mutate(
cluster = str_extract(cluster_full, "\\d+"),
Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
Cluster.term = str_replace(cluster_full,"^\\d+_",""),
Cluster.term = str_replace(Cluster.term,"_.*$","")
) %>%
mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
dplyr::select(cluster, Cluster.name)
clustered_Oral_DEGs_enrichedGO <- clustered_Oral_DEGs_enrichedGO %>%
left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
mutate(Tissue="Oral")
clustered_Aboral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Aboral@enrich_GOs@data)
cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Aboral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>%
mutate(
cluster = str_extract(cluster_full, "\\d+"),
Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
Cluster.term = str_replace(cluster_full,"^\\d+_",""),
Cluster.term = str_replace(Cluster.term,"_.*$","")
) %>%
mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
dplyr::select(cluster, Cluster.name)
clustered_Aboral_DEGs_enrichedGO <- clustered_Aboral_DEGs_enrichedGO %>%
left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
mutate(Tissue="Aboral")
clustered_allDEGs_enrichedGO <- rbind(clustered_Oral_DEGs_enrichedGO %>%
dplyr::select(-contains("Oral")),
clustered_Aboral_DEGs_enrichedGO %>%
dplyr::select(-contains("Aboral")))
library(ggdendro)
library(patchwork)
dist_mat <- slot(Wang_clusters_wardD2_Oral, "clusters_dist")[["BMA"]]
hc <- hclust(dist_mat, method = "ward.D2")
dend <- as.dendrogram(hc)
dend <- dendextend::rotate(dend,c(2,3,1,4,5,10,11,13,14,12,7,8,9,6))
dend_data <- ggdendro::dendro_data(dend, type = "rectangle")
dend_labels_oral <- dend_data$labels %>%
mutate(
cluster = str_extract(label, "\\d+"),
Cluster.name = str_replace(label,".*?_.*?_",""),
Cluster.term = str_replace(label,"^\\d+_",""),
Cluster.term = str_replace(Cluster.term,"_.*$","")
) %>%
mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name))
bar_plot_oral <- clustered_allDEGs_enrichedGO %>%
filter(Tissue == "Oral") %>%
arrange(as.numeric(GO.cluster)) %>%
mutate(Cluster_label = factor(Cluster.name, levels = rev(dend_labels_oral$Cluster.name))) %>%
ggplot(aes(x = Cluster_label)) +
stat_count(width = .8, fill="#FD8D3C", alpha = 0.8) +
coord_flip() +
theme_classic(base_size = 9) +
scale_y_continuous(
breaks = scales::breaks_width(2),
expand = c(0, 0),
limits = c(0, NA)
) +
theme(
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
strip.text = element_text(face = "bold", size = 9),
strip.background = element_rect(fill = "white", color = "black"),
legend.position = "none",
plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
panel.spacing.y = unit(0.2, "lines")
) + labs(y = "Terms in Cluster")
dend_plot_oral <- ggplot() +
geom_segment(
data = dend_data$segments,
aes(x = y, y = x, xend = yend, yend = xend),
linewidth = 0.6,
color = "#2C3E50"
) +
scale_y_reverse(
breaks = seq_along(dend_labels_oral$Cluster.name),
expand = c(0.02, 0.02)
) +
scale_x_reverse(expand = c(0.02, 0)) +
theme_void()
# Combine plots with better proportions
final_oral_dend <- wrap_elements(
dend_plot_oral + bar_plot_oral + plot_layout(widths = c(0.8, 3.5)) + theme(plot.margin = margin(0, 0, 0, 0)))
Wang_clusters_wardD2_Oral@heatmap$GOclusters_static
ggsave("../output_RNA/differential_expression/semantic-enrichment/BP_Dend_Clusters_Oral.png", final_oral_dend, width = 5, height = 6.5, dpi = 300)
library(ggdendro)
dist_mat <- slot(Wang_clusters_wardD2_Aboral, "clusters_dist")[["BMA"]]
hc <- hclust(dist_mat, method = "ward.D2")
dend <- as.dendrogram(hc)
dend <- dendextend::rotate(dend,c(1:8,11,12,9:10,13:16))
dend_data <- ggdendro::dendro_data(dend, type = "rectangle")
dend_labels_aboral <- dend_data$labels %>%
mutate(
cluster = str_extract(label, "\\d+"),
Cluster.name = str_replace(label,".*?_.*?_",""),
Cluster.term = str_replace(label,"^\\d+_",""),
Cluster.term = str_replace(Cluster.term,"_.*$","")
) %>%
mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name))
bar_plot_aboral <- clustered_allDEGs_enrichedGO %>%
filter(Tissue == "Aboral") %>%
arrange(as.numeric(GO.cluster)) %>%
mutate(Cluster_label = factor(Cluster.name, levels = rev(dend_labels_aboral$Cluster.name))) %>%
ggplot(aes(x = Cluster_label)) +
stat_count(width = .8, fill="#756BB1", alpha = 0.8) +
coord_flip() +
theme_classic(base_size = 9) +
scale_y_continuous(
breaks = scales::breaks_width(2),
expand = c(0, 0),
limits = c(0, NA)
) +
theme(
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
strip.text = element_text(face = "bold", size = 9),
strip.background = element_rect(fill = "white", color = "black"),
legend.position = "none",
plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
panel.spacing.y = unit(0.2, "lines")
) + labs(y = "Terms in Cluster")
dend_plot_aboral <- ggplot() +
geom_segment(
data = dend_data$segments,
aes(x = y, y = x, xend = yend, yend = xend),
linewidth = 0.6,
color = "#2C3E50"
) +
scale_y_reverse(
breaks = seq_along(dend_labels_aboral$Cluster.name),
expand = c(0.02, 0.02)
) +
scale_x_reverse(expand = c(0.02, 0)) +
theme_void() #+
#theme(
# plot.margin = margin(t = 30, r = 0, b = 22, l = 5)
#)
# Combine plots with better proportions
final_aboral_dend <- wrap_elements(
dend_plot_aboral + bar_plot_aboral + plot_layout(widths = c(0.8, 3.5)) + theme(plot.margin = margin(0, 0, 0, 0)))
Wang_clusters_wardD2_Oral@heatmap$GOclusters_static
ggsave("../output_RNA/differential_expression/semantic-enrichment/BP_Dend_Clusters_Aboral.png", final_aboral_dend, width = 5, height = 6.5, dpi = 300)
p1 <- clustered_allDEGs_enrichedGO %>%
filter(Tissue == "Aboral") %>%
arrange(as.numeric(GO.cluster)) %>%
mutate(Cluster_label = factor(Cluster.name, levels = rev(unique(Cluster.name)))) %>%
ggplot(aes(x = Cluster_label)) +
stat_count(width = .8, fill="#756BB1", alpha = 0.8) +
coord_flip() +
scale_y_continuous(breaks = scales::breaks_width(2)) +
theme_bw(base_size = 9) +
theme(
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
strip.text = element_text(face = "bold", size = 9),
strip.background = element_rect(fill = "white", color = "black"),
legend.position = "none",
plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
panel.spacing.y = unit(0.2, "lines")
) + labs(subtitle = "Aboral", y = "Terms in Cluster")
p2 <- clustered_allDEGs_enrichedGO %>%
filter(Tissue == "Oral") %>%
arrange(as.numeric(GO.cluster)) %>%
mutate(Cluster_label = factor(Cluster.name, levels = rev(unique(Cluster.name)))) %>%
ggplot(aes(x = Cluster_label)) +
stat_count(width = .8, fill="#FD8D3C", alpha = 0.8) +
coord_flip() +
theme_bw(base_size = 9) +
scale_y_continuous(breaks = scales::breaks_width(2)) +
theme(
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
strip.text = element_text(face = "bold", size = 9),
strip.background = element_rect(fill = "white", color = "black"),
legend.position = "none",
plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
panel.spacing.y = unit(0.2, "lines")
) + labs(subtitle = "Oral epidermis", y = "Terms in Cluster")
plot <- p1 + p2 +
plot_annotation(
title = "Semantic Similarity Clusters of All Enriched (p < 0.01) BP GO Terms",
theme = theme(plot.title = element_text(face = "bold", size = 9, hjust = 0.5))
)
ggsave("../output_RNA/differential_expression/semantic-enrichment/BP_Clusters.png", plot, width = 7, height = 6.5, dpi = 300)
top_Oral_per_Cluster <- clustered_Oral_DEGs_enrichedGO %>% group_by(GO.cluster) %>% arrange(Oral_elim.pvalue) %>% slice(1) %>% ungroup() %>% dplyr::select(GO.cluster,GO.ID,term) %>% rename("top_sig_GO" = `GO.ID`,"top_sig_term" = `term`)
plotting_oral <- left_join(clustered_Oral_DEGs_enrichedGO,top_Oral_per_Cluster) %>%
arrange(as.numeric(GO.cluster)) %>%
mutate(is_top = GO.ID == top_sig_GO,
pvalue = Oral_elim.pvalue,
term_wrapped = str_wrap(`term`, width = 25),
cluster_label = factor(str_wrap(Cluster.name,
width = 20), levels = unique(str_wrap(Cluster.name,
width = 20)))) %>%
add_count(GO.cluster, name = "n_terms")
## Joining with `by = join_by(GO.cluster)`
top_Aboral_per_Cluster <- clustered_Aboral_DEGs_enrichedGO %>% group_by(GO.cluster) %>% arrange(Aboral_elim.pvalue) %>% slice(1) %>% ungroup() %>% dplyr::select(GO.cluster,GO.ID,term) %>% rename("top_sig_GO" = `GO.ID`,"top_sig_term" = `term`)
plotting_aboral <- left_join(clustered_Aboral_DEGs_enrichedGO,top_Aboral_per_Cluster)%>%
arrange(as.numeric(GO.cluster)) %>%
mutate(is_top = GO.ID == top_sig_GO,
pvalue = Aboral_elim.pvalue,
term_wrapped = str_wrap(`term`, width = 25),
cluster_label = factor(str_wrap(Cluster.name,
width = 20), levels = unique(str_wrap(Cluster.name,
width = 20)))) %>%
add_count(GO.cluster, name = "n_terms")
## Joining with `by = join_by(GO.cluster)`
combined_plotting <- rbind(plotting_oral %>%
dplyr::select(-contains("Oral")),
plotting_aboral %>%
dplyr::select(-contains("Aboral")))
library(ggrepel)
p_oral <-ggplot(plotting_oral,
aes(x = `Oral_elim.-log10_pvalue`,
y = reorder(GO.ID, `Oral_elim.-log10_pvalue`),
size = `Oral_elim.-log10_pvalue`)) +
geom_segment(aes(xend = 0, yend = reorder(GO.ID, `Oral_elim.-log10_pvalue`)),
alpha = 0.4, linewidth = 0.6, color = "#FD8D3C") +
geom_point(alpha = 0.7, color = "#FD8D3C") +
facet_grid(cluster_label ~ .,
scales = "free_y", space = "free_y") +
geom_text_repel(data = plotting_oral %>% filter(is_top),
aes(label = str_wrap(term, width = 20)),
size = 2,
hjust = 0,
nudge_x = 0.5,
nudge_y = -0.5,
direction = "y",
color = "black",
segment.color = "gray50",
segment.size = 0.5,
max.overlaps = 30) +
labs(title = "Oral Epidermis Tissue",
x = "-log10(p-value)",
y = NULL) +
theme_minimal(base_size = 11) +
theme(plot.title = element_text(face = "bold", size = 12),
axis.text.y = element_text(size = 7.5),
strip.text.y = element_text(size = 7.5, angle = 0, hjust=0,face = "bold"),
legend.position = "none",
panel.spacing = unit(.15, "lines"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()) +
scale_size_continuous(range = c(2, 5))
p_aboral <- ggplot(plotting_aboral,
aes(x = `Aboral_elim.-log10_pvalue`,
y = reorder(GO.ID, `Aboral_elim.-log10_pvalue`),
color = factor(GO.cluster),
size = `Aboral_elim.-log10_pvalue`)) +
geom_segment(aes(xend = 0, yend = reorder(GO.ID, `Aboral_elim.-log10_pvalue`)),
alpha = 0.4, linewidth = 0.6, color = "#756BB1") +
geom_point(alpha = 0.7, color = "#756BB1") +
facet_grid(cluster_label ~ .,
scales = "free_y", space = "free_y") +
geom_text_repel(data = plotting_aboral %>% filter(is_top),
aes(label = str_wrap(term, width = 20)),
size = 2,
hjust = 0,
nudge_x = 0.5,
nudge_y = -0.5,
direction = "y",
color = "black",
segment.color = "gray50",
segment.size = 0.5,
max.overlaps = 30) +
labs(title = "Aboral Tissue",
x = "-log10(p-value)",
y = NULL) +
theme_minimal(base_size = 11) +
theme(plot.title = element_text(face = "bold", size = 12),
axis.text.y = element_text(size = 7.5),
strip.text.y = element_text(size = 7.5, angle = 0, hjust=0,face = "bold"),
legend.position = "none",
panel.spacing = unit(.15, "lines"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()) +
scale_size_continuous(range = c(2, 5))
p2 <- p_oral + p_aboral +
plot_annotation(
title = "Tissue-specific GO term enrichment patterns",
theme = theme(plot.title = element_text(face = "bold", size = 14))
)
ggsave("../output_RNA/differential_expression/semantic-enrichment/All_sig_BP_terms_by_cluster_colored.pdf", p2, width = 14, height = 16)
ggsave("../output_RNA/differential_expression/semantic-enrichment/All_sig_BP_terms_by_cluster_colored2.png", p2, width = 8, height = 12, dpi = 300)
layout <- "
AABB
AABB
AABB
AABB
CDBB
EFBB
"
final_fig <- free(p_oral) + free(p_aboral) +
dend_plot_oral + bar_plot_oral +
dend_plot_aboral + bar_plot_aboral +
plot_layout(design = layout, widths = c(1.5, 8,8,8), axes = "collect_x") +
plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(size = 12, face = "bold"))
final_fig <- final_fig + plot_annotation(tag_levels = list(c("A", "B", "C", "", "D", "")))
ggsave("../output_RNA/differential_expression/semantic-enrichment/All_sig_BP_terms_by_cluster_FULL.png", final_fig, width = 12, height = 16, dpi = 300)
top_Oral_per_Cluster <- clustered_Oral_DEGs_enrichedGO %>% group_by(GO.cluster) %>% arrange(Oral_elim.pvalue) %>% slice(1) %>% ungroup() %>% dplyr::select(GO.cluster,GO.ID,term) %>% rename("top_sig_GO" = `GO.ID`,"top_sig_term" = `term`)
plotting_oral <- left_join(clustered_Oral_DEGs_enrichedGO,top_Oral_per_Cluster) %>%
arrange(match(GO.cluster,dend_labels_oral$cluster)) %>%
mutate(is_top = GO.ID == top_sig_GO,
pvalue = Oral_elim.pvalue,
term_wrapped = str_wrap(`term`, width = 25),
cluster_label = factor(str_wrap(Cluster.name,
width = 20), levels = unique(str_wrap(Cluster.name,
width = 20)))) %>%
add_count(GO.cluster, name = "n_terms")
## Joining with `by = join_by(GO.cluster)`
top_Aboral_per_Cluster <- clustered_Aboral_DEGs_enrichedGO %>% group_by(GO.cluster) %>% arrange(Aboral_elim.pvalue) %>% slice(1) %>% ungroup() %>% dplyr::select(GO.cluster,GO.ID,term) %>% rename("top_sig_GO" = `GO.ID`,"top_sig_term" = `term`)
plotting_aboral <- left_join(clustered_Aboral_DEGs_enrichedGO,top_Aboral_per_Cluster)%>%
arrange(match(GO.cluster,dend_labels_aboral$cluster)) %>%
mutate(is_top = GO.ID == top_sig_GO,
pvalue = Aboral_elim.pvalue,
term_wrapped = str_wrap(`term`, width = 25),
cluster_label = factor(str_wrap(Cluster.name,
width = 20), levels = unique(str_wrap(Cluster.name,
width = 20)))) %>%
add_count(GO.cluster, name = "n_terms")
## Joining with `by = join_by(GO.cluster)`
combined_plotting <- rbind(plotting_oral %>%
dplyr::select(-contains("Oral")),
plotting_aboral %>%
dplyr::select(-contains("Aboral")))
p_oral <-ggplot(plotting_oral,
aes(x = `Oral_elim.-log10_pvalue`,
y = reorder(GO.ID, `Oral_elim.-log10_pvalue`),
size = `Oral_elim.-log10_pvalue`)) +
geom_segment(aes(xend = 0, yend = reorder(GO.ID, `Oral_elim.-log10_pvalue`)),
alpha = 0.4, linewidth = 0.6, color = "#FD8D3C") +
geom_point(alpha = 0.7, color = "#FD8D3C") +
facet_grid(cluster_label ~ .,
scales = "free_y", space = "free_y",switch = "y") +
geom_text_repel(data = plotting_oral %>% filter(is_top),
aes(label = str_wrap(term, width = 20)),
size = 2,
hjust = 0,
nudge_x = 0.5,
nudge_y = -0.5,
direction = "y",
color = "black",
segment.color = "gray50",
segment.size = 0.5,
max.overlaps = 30) +
labs(#title = "Oral Epidermis Tissue",
x = "-log10(p-value)",
y = NULL) +
theme_minimal(base_size = 11) +
theme(plot.title = element_text(face = "bold", size = 12),
axis.text.y = element_text(size = 7.5),
strip.text.y.left = element_text(size = 7.5, angle = 0, hjust = 0.5, face = "bold"),
strip.placement = "outside",
strip.background.y = element_rect(
fill = "white",
color = "black",
linewidth = 0.6
),
legend.position = "none",
panel.spacing = unit(.15, "lines"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()) +
scale_size_continuous(range = c(2, 5))
p_aboral <- ggplot(plotting_aboral,
aes(x = `Aboral_elim.-log10_pvalue`,
y = reorder(GO.ID, `Aboral_elim.-log10_pvalue`),
color = factor(GO.cluster),
size = `Aboral_elim.-log10_pvalue`)) +
geom_segment(aes(xend = 0, yend = reorder(GO.ID, `Aboral_elim.-log10_pvalue`)),
alpha = 0.4, linewidth = 0.6, color = "#756BB1") +
geom_point(alpha = 0.7, color = "#756BB1") +
facet_grid(cluster_label ~ .,
scales = "free_y", space = "free_y",switch = "y") +
geom_text_repel(data = plotting_aboral %>% filter(is_top),
aes(label = str_wrap(term, width = 20)),
size = 2,
hjust = 0,
nudge_x = 0.5,
nudge_y = -0.5,
direction = "y",
color = "black",
segment.color = "gray50",
segment.size = 0.5,
max.overlaps = 30) +
labs(#title = "Aboral Tissue",
x = "-log10(p-value)",
y = NULL) +
theme_minimal(base_size = 11) +
theme(plot.title = element_text(face = "bold", size = 12),
axis.text.y = element_text(size = 7.5),
strip.text.y.left = element_text(size = 7.5, angle = 0, hjust = 0.5, face = "bold"),
strip.placement = "outside",
strip.background.y = element_rect(
fill = "white",
color = "black",
linewidth = 0.6
),
legend.position = "none",
panel.spacing = unit(.15, "lines"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()) +
scale_size_continuous(range = c(2, 5))
final_fig <- dend_plot_oral + p_oral + dend_plot_aboral + p_aboral +
plot_layout(nrow=1,widths=c(1,4.5,1,4.5),heights =c(1,4.5,1,4.5)) +
plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(size = 12, face = "bold"))
final_fig <- final_fig + plot_annotation(tag_levels = list(c("A", "", "B", "")))
ggsave("../output_RNA/differential_expression/semantic-enrichment/All_sig_BP_terms_by_cluster_FULL_dend.png", final_fig, width = 8, height = 12, dpi = 300)
library(dplyr)
library(tidyr)
library(ggplot2)
# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
dplyr::select(Cluster.name, Tissue) %>%
group_by(Cluster.name) %>%
summarise(
Oral_terms = sum(Tissue=="Oral"),
Aboral_terms = sum(Tissue=="Aboral"),
.groups = "drop"
) %>%
mutate(
deviation = Aboral_terms - Oral_terms,
Oral_terms = -Oral_terms # Make negative for bidirectional plot
) %>%
pivot_longer(cols = c("Oral_terms", "Aboral_terms"),
names_to = "Tissue", values_to = "n_terms") %>%
mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))
# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
distinct(Cluster.name, deviation) %>%
arrange(deviation)
oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5
# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
geom_col(width = 0.7, alpha = 0.8) +
geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
geom_vline(xintercept = c(oral_to_equal, equal_to_aboral),
color = "grey60", linetype = "dashed", linewidth = 0.6) +
scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
coord_flip() +
theme_minimal() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom",
legend.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12)
) +
scale_y_continuous(labels = abs) +
labs(
x = "Gene clusters",
y = "Number of enriched GO terms",
title = "Tissue-specific GO term enrichment patterns",
subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
)
ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional_clustered_separate.png", p, width = 10, height = 8, dpi = 300)
# create viseago object
selection <- names(selection_Oral)[selection_Oral==1]
background <- names(expressed)
CC_Oral <- ViSEAGO::create_topGOdata(
geneSel=selection,
allGenes=background,
gene2GO=myGENE2GO_Pacuta,
ont="CC",
nodeSize=5
)
##
## Building most specific GOs .....
## ( 1530 GO terms found. )
##
## Build GO DAG topology ..........
## ( 1759 GO terms and 2893 relations. )
##
## Annotating nodes ...............
## ( 10114 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Oral <- topGO::runTest(
CC_Oral,
algorithm ="elim",
statistic = "fisher"
)
##
## -- Elim Algorithm --
##
## the algorithm is scoring 615 nontrivial nodes
## parameters:
## test statistic: fisher
## cutOff: 0.01
##
## Level 12: 1 nodes to be scored (0 eliminated genes)
##
## Level 11: 22 nodes to be scored (0 eliminated genes)
##
## Level 10: 60 nodes to be scored (0 eliminated genes)
##
## Level 9: 83 nodes to be scored (6 eliminated genes)
##
## Level 8: 98 nodes to be scored (12 eliminated genes)
##
## Level 7: 97 nodes to be scored (54 eliminated genes)
##
## Level 6: 96 nodes to be scored (336 eliminated genes)
##
## Level 5: 67 nodes to be scored (761 eliminated genes)
##
## Level 4: 44 nodes to be scored (772 eliminated genes)
##
## Level 3: 44 nodes to be scored (772 eliminated genes)
##
## Level 2: 2 nodes to be scored (3565 eliminated genes)
##
## Level 1: 1 nodes to be scored (3565 eliminated genes)
CC_Results <- ViSEAGO::merge_enrich_terms(
Input = list( Oral_elim = c("CC_Oral", "elim_Oral")))
## 'select()' returned 1:1 mapping between keys and columns
# display the merged table
CC_Results
## - object class: enrich_GO_terms
## - ontology: CC
## - method: topGO
## - summary:
## Oral_elim
## CC_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10652
## available_genes_significant: 825
## feasible_genes: 10114
## feasible_genes_significant: 773
## genes_nodeSize: 5
## nodes_number: 968
## edges_number: 1611
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 968
## GO_significant: 19
## feasible_genes: 10114
## feasible_genes_significant: 773
## genes_nodeSize: 5
## Nontrivial_nodes: 615
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## - enrich GOs (in at least one list): 19 GO terms of 1 conditions.
## Oral_elim : 19 terms
ViSEAGO::show_table(CC_Results)
# print the merged table in a file
ViSEAGO::show_table(
CC_Results,
"../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_Oral.tsv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=CC_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance=c("Wang")
)
myGOs
## - object class: GO_SS
## - ontology: CC
## - method: topGO
## - summary:
## Oral_elim
## CC_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10652
## available_genes_significant: 825
## feasible_genes: 10114
## feasible_genes_significant: 773
## genes_nodeSize: 5
## nodes_number: 968
## edges_number: 1611
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 968
## GO_significant: 19
## feasible_genes: 10114
## feasible_genes_significant: 773
## genes_nodeSize: 5
## Nontrivial_nodes: 615
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## - enrich GOs (in at least one list): 19 GO terms of 1 conditions.
## Oral_elim : 19 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Oral <- ViSEAGO::GOterms_heatmap(
myGOs,
showIC = TRUE,
showGOlabels = TRUE,
GO.tree = list(
tree = list(distance = "Wang", aggreg.method = "ward.D2"),
cut = list(
dynamic = list(
pamStage = TRUE,
pamRespectsDendro = TRUE,
deepSplit = 2,
minClusterSize = 2
)
)
),
samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Oral,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Oral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2_Oral,
"../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_Oral_cluster_heatmap_Wang_wardD2.tsv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Oral,
"GOterms")
# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Oral<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2_Oral,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned 1:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Oral,
"GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Oral <-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2_Oral,
tree=list(
distance="BMA",
aggreg.method="ward.D2"
)
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Oral,
"GOclusters")
# create viseago object
selection <- names(selection_Aboral)[selection_Aboral==1]
background <- names(expressed)
CC_Aboral <- ViSEAGO::create_topGOdata(
geneSel=selection,
allGenes=background,
gene2GO=myGENE2GO_Pacuta,
ont="CC",
nodeSize=5
)
##
## Building most specific GOs .....
## ( 1530 GO terms found. )
##
## Build GO DAG topology ..........
## ( 1759 GO terms and 2893 relations. )
##
## Annotating nodes ...............
## ( 10114 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Aboral <- topGO::runTest(
CC_Aboral,
algorithm ="elim",
statistic = "fisher"
)
##
## -- Elim Algorithm --
##
## the algorithm is scoring 350 nontrivial nodes
## parameters:
## test statistic: fisher
## cutOff: 0.01
##
## Level 11: 6 nodes to be scored (0 eliminated genes)
##
## Level 10: 26 nodes to be scored (0 eliminated genes)
##
## Level 9: 39 nodes to be scored (0 eliminated genes)
##
## Level 8: 52 nodes to be scored (8 eliminated genes)
##
## Level 7: 55 nodes to be scored (43 eliminated genes)
##
## Level 6: 61 nodes to be scored (451 eliminated genes)
##
## Level 5: 48 nodes to be scored (871 eliminated genes)
##
## Level 4: 29 nodes to be scored (1110 eliminated genes)
##
## Level 3: 31 nodes to be scored (3132 eliminated genes)
##
## Level 2: 2 nodes to be scored (3513 eliminated genes)
##
## Level 1: 1 nodes to be scored (3513 eliminated genes)
CC_Results <- ViSEAGO::merge_enrich_terms(
Input = list(Aboral_elim = c("CC_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns
# display the merged table
CC_Results
## - object class: enrich_GO_terms
## - ontology: CC
## - method: topGO
## - summary:
## Aboral_elim
## CC_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10652
## available_genes_significant: 376
## feasible_genes: 10114
## feasible_genes_significant: 365
## genes_nodeSize: 5
## nodes_number: 968
## edges_number: 1611
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 968
## GO_significant: 26
## feasible_genes: 10114
## feasible_genes_significant: 365
## genes_nodeSize: 5
## Nontrivial_nodes: 350
## - enrichment pvalue cutoff:
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 26 GO terms of 1 conditions.
## Aboral_elim : 26 terms
ViSEAGO::show_table(CC_Results)
# print the merged table in a file
ViSEAGO::show_table(
CC_Results,
"../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_Aboral.tsv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=CC_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance=c("Wang")
)
myGOs
## - object class: GO_SS
## - ontology: CC
## - method: topGO
## - summary:
## Aboral_elim
## CC_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10652
## available_genes_significant: 376
## feasible_genes: 10114
## feasible_genes_significant: 365
## genes_nodeSize: 5
## nodes_number: 968
## edges_number: 1611
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 968
## GO_significant: 26
## feasible_genes: 10114
## feasible_genes_significant: 365
## genes_nodeSize: 5
## Nontrivial_nodes: 350
## - enrichment pvalue cutoff:
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 26 GO terms of 1 conditions.
## Aboral_elim : 26 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Aboral <- ViSEAGO::GOterms_heatmap(
myGOs,
showIC = TRUE,
showGOlabels = TRUE,
GO.tree = list(
tree = list(distance = "Wang", aggreg.method = "ward.D2"),
cut = list(
dynamic = list(
pamStage = TRUE,
pamRespectsDendro = TRUE,
deepSplit = 2,
minClusterSize = 2
)
)
),
samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Aboral,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Aboral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2_Aboral,
"../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_Aboral_cluster_heatmap_Wang_wardD2.tsv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Aboral,
"GOterms")
# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Aboral<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2_Aboral,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Aboral,
"GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Aboral<-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2_Aboral,
tree=list(
distance="BMA",
aggreg.method="ward.D2"
)
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Aboral,
"GOclusters")
clustered_Oral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Oral@enrich_GOs@data)
cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Oral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>%
mutate(
cluster = str_extract(cluster_full, "\\d+"),
Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
Cluster.term = str_replace(cluster_full,"^\\d+_",""),
Cluster.term = str_replace(Cluster.term,"_.*$","")
) %>%
mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
dplyr::select(cluster, Cluster.name)
clustered_Oral_DEGs_enrichedGO <- clustered_Oral_DEGs_enrichedGO %>%
left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
mutate(Tissue="Oral")
clustered_Aboral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Aboral@enrich_GOs@data)
cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Aboral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>%
mutate(
cluster = str_extract(cluster_full, "\\d+"),
Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
Cluster.term = str_replace(cluster_full,"^\\d+_",""),
Cluster.term = str_replace(Cluster.term,"_.*$","")
) %>%
mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
dplyr::select(cluster, Cluster.name)
clustered_Aboral_DEGs_enrichedGO <- clustered_Aboral_DEGs_enrichedGO %>%
left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
mutate(Tissue="Aboral")
clustered_allDEGs_enrichedGO <- rbind(clustered_Oral_DEGs_enrichedGO %>%
dplyr::select(-contains("Oral")),
clustered_Aboral_DEGs_enrichedGO %>%
dplyr::select(-contains("Aboral")))
library(dplyr)
library(tidyr)
library(ggplot2)
# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
dplyr::select(Cluster.name, Tissue) %>%
group_by(Cluster.name) %>%
summarise(
Oral_terms = sum(Tissue=="Oral"),
Aboral_terms = sum(Tissue=="Aboral"),
.groups = "drop"
) %>%
mutate(
deviation = Aboral_terms - Oral_terms,
Oral_terms = -Oral_terms # Make negative for bidirectional plot
) %>%
pivot_longer(cols = c("Oral_terms", "Aboral_terms"),
names_to = "Tissue", values_to = "n_terms") %>%
mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))
# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
distinct(Cluster.name, deviation) %>%
arrange(deviation)
oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5
# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
geom_col(width = 0.7, alpha = 0.8) +
geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
geom_vline(xintercept = c(oral_to_equal, equal_to_aboral),
color = "grey60", linetype = "dashed", linewidth = 0.6) +
scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
coord_flip() +
theme_minimal() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom",
legend.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12)
) +
scale_y_continuous(labels = abs) +
labs(
x = "Gene clusters",
y = "Number of enriched GO terms",
title = "Tissue-specific GO term enrichment patterns",
subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
)
ggsave("../output_RNA/differential_expression/semantic-enrichment/CC_GO_enrichment_bidirectional_clustered_separate.png", p, width = 10, height = 8, dpi = 300)
# create viseago object
selection <- names(selection_Oral)[selection_Oral==1]
background <- names(expressed)
MF_Oral <- ViSEAGO::create_topGOdata(
geneSel=selection,
allGenes=background,
gene2GO=myGENE2GO_Pacuta,
ont="MF",
nodeSize=5
)
##
## Building most specific GOs .....
## ( 3418 GO terms found. )
##
## Build GO DAG topology ..........
## ( 3992 GO terms and 5216 relations. )
##
## Annotating nodes ...............
## ( 9105 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Oral <- topGO::runTest(
MF_Oral,
algorithm ="elim",
statistic = "fisher"
)
##
## -- Elim Algorithm --
##
## the algorithm is scoring 980 nontrivial nodes
## parameters:
## test statistic: fisher
## cutOff: 0.01
##
## Level 12: 2 nodes to be scored (0 eliminated genes)
##
## Level 11: 8 nodes to be scored (0 eliminated genes)
##
## Level 10: 19 nodes to be scored (32 eliminated genes)
##
## Level 9: 44 nodes to be scored (32 eliminated genes)
##
## Level 8: 87 nodes to be scored (327 eliminated genes)
##
## Level 7: 136 nodes to be scored (422 eliminated genes)
##
## Level 6: 203 nodes to be scored (1035 eliminated genes)
##
## Level 5: 223 nodes to be scored (1092 eliminated genes)
##
## Level 4: 188 nodes to be scored (1475 eliminated genes)
##
## Level 3: 51 nodes to be scored (1479 eliminated genes)
##
## Level 2: 18 nodes to be scored (1479 eliminated genes)
##
## Level 1: 1 nodes to be scored (1479 eliminated genes)
MF_Results <- ViSEAGO::merge_enrich_terms(
Input = list( Oral_elim = c("MF_Oral", "elim_Oral")))
## 'select()' returned 1:1 mapping between keys and columns
# display the merged table
MF_Results
## - object class: enrich_GO_terms
## - ontology: MF
## - method: topGO
## - summary:
## Oral_elim
## MF_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10652
## available_genes_significant: 825
## feasible_genes: 9105
## feasible_genes_significant: 707
## genes_nodeSize: 5
## nodes_number: 1477
## edges_number: 1949
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 1477
## GO_significant: 28
## feasible_genes: 9105
## feasible_genes_significant: 707
## genes_nodeSize: 5
## Nontrivial_nodes: 980
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## - enrich GOs (in at least one list): 28 GO terms of 1 conditions.
## Oral_elim : 28 terms
ViSEAGO::show_table(MF_Results)
# print the merged table in a file
ViSEAGO::show_table(
MF_Results,
"../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_Oral.tsv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=MF_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance=c("Wang")
)
myGOs
## - object class: GO_SS
## - ontology: MF
## - method: topGO
## - summary:
## Oral_elim
## MF_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10652
## available_genes_significant: 825
## feasible_genes: 9105
## feasible_genes_significant: 707
## genes_nodeSize: 5
## nodes_number: 1477
## edges_number: 1949
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 1477
## GO_significant: 28
## feasible_genes: 9105
## feasible_genes_significant: 707
## genes_nodeSize: 5
## Nontrivial_nodes: 980
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## - enrich GOs (in at least one list): 28 GO terms of 1 conditions.
## Oral_elim : 28 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Oral <- ViSEAGO::GOterms_heatmap(
myGOs,
showIC = TRUE,
showGOlabels = TRUE,
GO.tree = list(
tree = list(distance = "Wang", aggreg.method = "ward.D2"),
cut = list(
dynamic = list(
pamStage = TRUE,
pamRespectsDendro = TRUE,
deepSplit = 2,
minClusterSize = 2
)
)
),
samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Oral,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Oral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2_Oral,
"../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_Oral_cluster_heatmap_Wang_wardD2.tsv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Oral,
"GOterms")
# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Oral<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2_Oral,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned 1:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Oral,
"GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Oral <-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2_Oral,
tree=list(
distance="BMA",
aggreg.method="ward.D2"
)
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Oral,
"GOclusters")
# create viseago object
selection <- names(selection_Aboral)[selection_Aboral==1]
background <- names(expressed)
MF_Aboral <- ViSEAGO::create_topGOdata(
geneSel=selection,
allGenes=background,
gene2GO=myGENE2GO_Pacuta,
ont="MF",
nodeSize=5
)
##
## Building most specific GOs .....
## ( 3418 GO terms found. )
##
## Build GO DAG topology ..........
## ( 3992 GO terms and 5216 relations. )
##
## Annotating nodes ...............
## ( 9105 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Aboral <- topGO::runTest(
MF_Aboral,
algorithm ="elim",
statistic = "fisher"
)
##
## -- Elim Algorithm --
##
## the algorithm is scoring 581 nontrivial nodes
## parameters:
## test statistic: fisher
## cutOff: 0.01
##
## Level 11: 3 nodes to be scored (0 eliminated genes)
##
## Level 10: 6 nodes to be scored (0 eliminated genes)
##
## Level 9: 17 nodes to be scored (0 eliminated genes)
##
## Level 8: 41 nodes to be scored (30 eliminated genes)
##
## Level 7: 79 nodes to be scored (141 eliminated genes)
##
## Level 6: 117 nodes to be scored (698 eliminated genes)
##
## Level 5: 135 nodes to be scored (954 eliminated genes)
##
## Level 4: 124 nodes to be scored (1012 eliminated genes)
##
## Level 3: 42 nodes to be scored (1336 eliminated genes)
##
## Level 2: 16 nodes to be scored (1791 eliminated genes)
##
## Level 1: 1 nodes to be scored (1791 eliminated genes)
MF_Results <- ViSEAGO::merge_enrich_terms(
Input = list(Aboral_elim = c("MF_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns
# display the merged table
MF_Results
## - object class: enrich_GO_terms
## - ontology: MF
## - method: topGO
## - summary:
## Aboral_elim
## MF_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10652
## available_genes_significant: 376
## feasible_genes: 9105
## feasible_genes_significant: 330
## genes_nodeSize: 5
## nodes_number: 1477
## edges_number: 1949
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 1477
## GO_significant: 27
## feasible_genes: 9105
## feasible_genes_significant: 330
## genes_nodeSize: 5
## Nontrivial_nodes: 581
## - enrichment pvalue cutoff:
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 27 GO terms of 1 conditions.
## Aboral_elim : 27 terms
ViSEAGO::show_table(MF_Results)
# print the merged table in a file
ViSEAGO::show_table(
MF_Results,
"../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_Aboral.tsv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=MF_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance=c("Wang")
)
myGOs
## - object class: GO_SS
## - ontology: MF
## - method: topGO
## - summary:
## Aboral_elim
## MF_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10652
## available_genes_significant: 376
## feasible_genes: 9105
## feasible_genes_significant: 330
## genes_nodeSize: 5
## nodes_number: 1477
## edges_number: 1949
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 1477
## GO_significant: 27
## feasible_genes: 9105
## feasible_genes_significant: 330
## genes_nodeSize: 5
## Nontrivial_nodes: 581
## - enrichment pvalue cutoff:
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 27 GO terms of 1 conditions.
## Aboral_elim : 27 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Aboral <- ViSEAGO::GOterms_heatmap(
myGOs,
showIC = TRUE,
showGOlabels = TRUE,
GO.tree = list(
tree = list(distance = "Wang", aggreg.method = "ward.D2"),
cut = list(
dynamic = list(
pamStage = TRUE,
pamRespectsDendro = TRUE,
deepSplit = 2,
minClusterSize = 2
)
)
),
samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Aboral,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Aboral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2_Aboral,
"../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_Aboral_cluster_heatmap_Wang_wardD2.tsv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Aboral,
"GOterms")
# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Aboral<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2_Aboral,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned 1:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Aboral,
"GOclusters")
cluster_GOs <- Wang_clusters_wardD2_Aboral@enrich_GOs@data %>% dplyr::filter(GO.cluster == 8) %>% pull(GO.ID)
showSigOfNodes(MF_Aboral, score(elim_Aboral)[names(score(elim_Aboral)) %in% cluster_GOs], firstSigNodes = length(cluster_GOs), useInfo = 'all')
## $dag
## A graphNEL graph with directed edges
## Number of Nodes = 8
## Number of Edges = 8
##
## $complete.dag
## [1] "A graph with 8 nodes."
# GOclusters heatmap
Wang_clusters_wardD2_Aboral<-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2_Aboral,
tree=list(
distance="BMA",
aggreg.method="ward.D2"
)
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Aboral,
"GOclusters")
clustered_Oral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Oral@enrich_GOs@data)
cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Oral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>%
mutate(
cluster = str_extract(cluster_full, "\\d+"),
Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
Cluster.term = str_replace(cluster_full,"^\\d+_",""),
Cluster.term = str_replace(Cluster.term,"_.*$","")
) %>%
mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
dplyr::select(cluster, Cluster.name)
clustered_Oral_DEGs_enrichedGO <- clustered_Oral_DEGs_enrichedGO %>%
left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
mutate(Tissue="Oral")
clustered_Aboral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Aboral@enrich_GOs@data)
cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Aboral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>%
mutate(
cluster = str_extract(cluster_full, "\\d+"),
Cluster.name = str_wrap(str_replace(cluster_full,".*?_.*?_",""), width = 60),
Cluster.term = str_replace(cluster_full,"^\\d+_",""),
Cluster.term = str_replace(Cluster.term,"_.*$","")
) %>%
mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
dplyr::select(cluster, Cluster.name)
clustered_Aboral_DEGs_enrichedGO <- clustered_Aboral_DEGs_enrichedGO %>%
left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
mutate(Tissue="Aboral")
clustered_allDEGs_enrichedGO <- rbind(clustered_Oral_DEGs_enrichedGO %>%
dplyr::select(-contains("Oral")),
clustered_Aboral_DEGs_enrichedGO %>%
dplyr::select(-contains("Aboral")))
library(dplyr)
library(tidyr)
library(ggplot2)
# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
dplyr::select(Cluster.name, Tissue) %>%
group_by(Cluster.name) %>%
summarise(
Oral_terms = sum(Tissue=="Oral"),
Aboral_terms = sum(Tissue=="Aboral"),
.groups = "drop"
) %>%
mutate(
deviation = Aboral_terms - Oral_terms,
Oral_terms = -Oral_terms # Make negative for bidirectional plot
) %>%
pivot_longer(cols = c("Oral_terms", "Aboral_terms"),
names_to = "Tissue", values_to = "n_terms") %>%
mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))
# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
distinct(Cluster.name, deviation) %>%
arrange(deviation)
oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5
# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
geom_col(width = 0.7, alpha = 0.8) +
geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
geom_vline(xintercept = c(oral_to_equal, equal_to_aboral),
color = "grey60", linetype = "dashed", linewidth = 0.6) +
scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
coord_flip() +
theme_minimal() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom",
legend.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12)
) +
scale_y_continuous(labels = abs) +
labs(
x = "Gene clusters",
y = "Number of enriched GO terms",
title = "Tissue-specific GO term enrichment patterns",
subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
)
ggsave("../output_RNA/differential_expression/semantic-enrichment/MF_GO_enrichment_bidirectional_clustered_separate.png", p, width = 10, height = 8, dpi = 300)
BP_GOs_Aboral <- read.delim(file="../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral.tsv") %>% mutate(ontology="BP")
MF_GOs_Aboral <- read.delim(file="../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_Aboral.tsv") %>% mutate(ontology="MF")
CC_GOs_Aboral <- read.delim(file="../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_Aboral.tsv") %>% mutate(ontology="CC")
BP_GOs_Oral <- read.delim(file="../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral.tsv") %>% mutate(ontology="BP")
MF_GOs_Oral <- read.delim(file="../output_RNA/differential_expression/semantic-enrichment/MF_DE_05_Oral.tsv") %>% mutate(ontology="MF")
CC_GOs_Oral <- read.delim(file="../output_RNA/differential_expression/semantic-enrichment/CC_DE_05_Oral.tsv") %>% mutate(ontology="CC")
oral_combined <- rbind(BP_GOs_Oral,MF_GOs_Oral,CC_GOs_Oral) %>%
rename_with(~ str_replace_all(.x, "Oral_elim|\\.", "")) %>% mutate(Tissue="Oral")
aboral_combined <- rbind(BP_GOs_Aboral,MF_GOs_Aboral,CC_GOs_Aboral) %>%
rename_with(~ str_replace_all(.x, "Aboral_elim|\\.", "")) %>% mutate(Tissue="Aboral")
tissues_combined <- rbind(oral_combined,aboral_combined) %>%
mutate(freq = as.numeric(str_remove(genes_frequency,pattern="\\%.*")))
aboral_cols <- c(
BP = "#756BB1",
CC = "#9E9AC8",
MF = "#CBC9E2"
)
oral_cols <- c(
BP = "#FD8D3C",
CC = "#FDAE6B",
MF = "#FDD0A2"
)
top10_pval <- tissues_combined %>%
filter(ontology=="BP") %>%
group_by(Tissue,ontology) %>%
slice_max(order_by = desc(pvalue), n = 30, with_ties = FALSE) %>%
arrange(Tissue, ontology, log10_pvalue) %>%
ungroup() %>%
mutate(term=str_wrap(`term`, width = 40))
p1 <- top10_pval %>% filter(Tissue == "Aboral") %>%
mutate(term = fct_reorder(term, log10_pvalue)) %>%
ggplot(aes(x = term, y = log10_pvalue, fill = ontology)) +
geom_col(width = .8, fill="#756BB1", alpha = 0.8) +
facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
coord_flip() +
# scale_fill_manual(values = aboral_cols) +
theme_bw(base_size = 9) +
#scale_y_continuous(limits = c(0, 35), expand = expansion(mult = c(0, 0.05)))+
theme(
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
strip.text = element_text(face = "bold", size = 9),
strip.background = element_rect(fill = "white", color = "black"),
legend.position = "none",
plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
panel.spacing.y = unit(0.2, "lines")
) + labs(subtitle = "Aboral", y = "-log10(p-value)")
p2 <- top10_pval %>% filter(Tissue == "Oral") %>%
mutate(term = fct_reorder(term, log10_pvalue)) %>%
ggplot(aes(x = term, y = log10_pvalue, fill = ontology)) +
geom_col(width = .8, fill="#FD8D3C", alpha = 0.8) +
facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
coord_flip() +
# scale_fill_manual(values = oral_cols) +
# scale_y_continuous(limits = c(0, 35), expand = expansion(mult = c(0, 0.05)))+
theme_bw(base_size = 9) +
theme(
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
strip.text = element_text(face = "bold", size = 9),
strip.background = element_rect(fill = "white", color = "black"),
legend.position = "none",
plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
panel.spacing.y = unit(0.2, "lines")
) + labs(subtitle = "Oral epidermis", y = "-log10(p-value)")
plot <- p1 + p2 +
plot_annotation(
title = "Top 30 enriched GO terms by p-value",
theme = theme(plot.title = element_text(face = "bold", size = 9, hjust = 0.5))
)
ggsave("../output_RNA/differential_expression/semantic-enrichment/Top_BP_30.png", plot, width = 7, height = 6.5, dpi = 300)
top10_freq <- tissues_combined %>% group_by(Tissue,ontology) %>%
slice_max(order_by = freq, n = 10, with_ties = FALSE) %>%
arrange(Tissue, ontology, log10_pvalue) %>%
ungroup() %>%
mutate(term=str_wrap(`term`, width = 40))
p1 <- top10_freq %>% filter(Tissue == "Aboral") %>%
mutate(term = fct_reorder(term, freq)) %>%
ggplot(aes(x = term, y = freq, fill = ontology)) +
geom_col(width = 0.7, alpha = 0.85) +
facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
coord_flip() +
scale_fill_manual(values = aboral_cols) +
theme_bw(base_size = 9) +
theme(
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
strip.text = element_text(face = "bold", size = 9),
strip.background = element_rect(fill = "white", color = "black"),
legend.position = "none",
plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
panel.spacing.y = unit(0.2, "lines")
) + labs(subtitle = "Aboral", y = "% DEGs with GO term of\nall genes with GO term")
p2 <- top10_freq %>% filter(Tissue == "Oral") %>%
mutate(term = fct_reorder(term, freq)) %>%
ggplot(aes(x = term, y = freq, fill = ontology)) +
geom_col(width = 0.7, alpha = 0.85) +
facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
coord_flip() +
scale_fill_manual(values = oral_cols) +
theme_bw(base_size = 9) +
theme(
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 7, lineheight = 0.7,color = "black"),
strip.text = element_text(face = "bold", size = 9),
strip.background = element_rect(fill = "white", color = "black"),
legend.position = "none",
plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
panel.spacing.y = unit(0.2, "lines")
) + labs(subtitle = "Oral epidermis", y = "% DEGs with GO term of\nall genes with GO term")
plot <- p1 + p2 +
plot_annotation(
title = "Top 10 enriched GO terms by frequency in tissue DEGs",
theme = theme(plot.title = element_text(face = "bold", size = 9, hjust = 0.5))
)
ggsave("../output_RNA/differential_expression/semantic-enrichment/All_Onts_frq.png", plot, width = 7, height = 6.5, dpi = 300)
# Create gradient scales for each tissue
aboral_gradient <- colorRampPalette(c("#CBC9E2", "#756BB1"))(100)
oral_gradient <- colorRampPalette(c("#FDD0A2", "#FD8D3C"))(100)
p1 <- top10_freq %>% filter(Tissue == "Aboral") %>%
mutate(term = fct_reorder(term, freq)) %>%
ggplot(aes(x = term, y = freq, fill = log10_pvalue)) +
geom_col(width = 0.7, alpha = 0.85) +
facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
coord_flip() +
scale_fill_gradientn(colors = aboral_gradient, name = "-log10(p-value)") +
theme_bw(base_size = 9) +
theme(
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 7, lineheight = 0.7, color = "black"),
strip.text = element_text(face = "bold", size = 9),
strip.background = element_rect(fill = "white", color = "black"),
legend.position = "right",
legend.key.height = unit(0.8, "cm"),
legend.key.width = unit(0.3, "cm"),
plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
panel.spacing.y = unit(0.2, "lines")
) + labs(subtitle = "Aboral", y = "% DEGs with GO term of\nall genes with GO term")
p2 <- top10_freq %>% filter(Tissue == "Oral") %>%
mutate(term = fct_reorder(term, freq)) %>%
ggplot(aes(x = term, y = freq, fill = log10_pvalue)) +
geom_col(width = 0.7, alpha = 0.85) +
facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
coord_flip() +
scale_fill_gradientn(colors = oral_gradient, name = "-log10(p-value)") +
theme_bw(base_size = 9) +
theme(
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 7, lineheight = 0.7, color = "black"),
strip.text = element_text(face = "bold", size = 9),
strip.background = element_rect(fill = "white", color = "black"),
legend.position = "right",
legend.key.height = unit(0.8, "cm"),
legend.key.width = unit(0.3, "cm"),
plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
panel.spacing.y = unit(0.2, "lines")
) + labs(subtitle = "Oral epidermis", y = "% DEGs with GO term of\nall genes with GO term")
plot <- p1 + p2 +
plot_annotation(
title = "Top 10 enriched GO terms by frequency in tissue DEGs",
theme = theme(plot.title = element_text(face = "bold", size = 9, hjust = 0.5))
)
ggsave("../output_RNA/differential_expression/semantic-enrichment/All_Onts_freq_pval.png", plot, width = 8, height = 6.5, dpi = 300)
top10_pval <- tissues_combined %>%
group_by(Tissue,ontology) %>%
slice_max(order_by = desc(pvalue), n = 10, with_ties = FALSE) %>%
arrange(Tissue, ontology, log10_pvalue) %>%
ungroup() %>%
mutate(term=str_wrap(`term`, width = 40))
make_lollipop <- function(df, subtitle_label,fill="white") {
df %>%
mutate(term = fct_reorder(term, log10_pvalue)) %>%
ggplot(aes(y = term, x = log10_pvalue)) +
# stick
geom_segment(aes(yend = term, x = 0, xend = log10_pvalue),
color = fill, linewidth = 0.6) +
# dot sized by frequency
geom_point(aes(size = freq, fill = fill),
shape = 21, color = "black") +
facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
scale_fill_manual(values = fill, guide = "none") +
scale_size_continuous(name = "% DEGs\nwith GO term",
limits = c(0, 100),
breaks = c(0, 25, 50, 75, 100),
range = c(2, 6)) +
theme_bw(base_size = 9) +
theme(
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 7, lineheight = 0.7, color = "black"),
strip.text = element_text(face = "bold", size = 9),
strip.background = element_rect(fill = "white", color = "black"),
legend.position = "bottom",
plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
panel.spacing.y = unit(0.2, "lines")
) +
labs(
subtitle = subtitle_label,
x = expression(-log[10](p-value))
)
}
p1 <- make_lollipop(
top10_pval %>% filter(Tissue == "Aboral"),
"Aboral", fill = "#9E9AC8"
)
p2 <- make_lollipop(
top10_pval %>% filter(Tissue == "Oral"),
"Oral epidermis", fill = "#FDAE6B"
)
plot <- p1 + p2 +
plot_layout(guides = "collect") +
plot_annotation(
title = "Top 10 enriched GO terms by p-value in tissue DEGs",
theme = theme(plot.title = element_text(face = "bold", size = 9, hjust = 0.5))
) &
theme(legend.position='bottom')
ggsave("../output_RNA/differential_expression/semantic-enrichment/All_Onts_pval_top10_lollipop.png", plot, width = 10, height = 6.5, dpi = 300)
top10_pval <- tissues_combined %>%
group_by(Tissue,ontology) %>%
slice_max(order_by = desc(pvalue), n = 20, with_ties = FALSE) %>%
arrange(Tissue, ontology, log10_pvalue) %>%
ungroup() %>%
mutate(term=str_wrap(`term`, width = 40))
make_lollipop <- function(df, subtitle_label,fill="white") {
df %>%
mutate(term = fct_reorder(term, log10_pvalue)) %>%
ggplot(aes(y = term, x = log10_pvalue)) +
# stick
geom_segment(aes(yend = term, x = 0, xend = log10_pvalue),
color = fill, linewidth = 0.6) +
# dot sized by frequency
geom_point(aes(size = freq, fill = fill),
shape = 21, color = "black") +
facet_grid(ontology ~ ., scales = "free_y", space = "free_y") +
scale_fill_manual(values = fill, guide = "none") +
scale_size_continuous(name = "% DEGs\nwith GO term",
limits = c(0, 100),
breaks = c(0, 25, 50, 75, 100),
range = c(2, 6)) +
theme_bw(base_size = 9) +
theme(
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(color = "grey90", linewidth = 0.2),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 7, lineheight = 0.7, color = "black"),
strip.text = element_text(face = "bold", size = 9),
strip.background = element_rect(fill = "white", color = "black"),
legend.position = "bottom",
plot.subtitle = element_text(face = "bold", size = 9, hjust = 0.5),
panel.spacing.y = unit(0.2, "lines")
) +
labs(
subtitle = subtitle_label,
x = expression(-log[10](p-value))
)
}
p1 <- make_lollipop(
top10_pval %>% filter(Tissue == "Aboral"),
"Aboral", fill = "#9E9AC8"
)
p2 <- make_lollipop(
top10_pval %>% filter(Tissue == "Oral"),
"Oral epidermis", fill = "#FDAE6B"
)
plot <- p1 + p2 +
plot_layout(guides = "collect") +
plot_annotation(
title = "Top 10 enriched GO terms by p-value in tissue DEGs",
theme = theme(plot.title = element_text(face = "bold", size = 9, hjust = 0.5))
) &
theme(legend.position='bottom')
ggsave("../output_RNA/differential_expression/semantic-enrichment/All_Onts_pval_top20_lollipop.png", plot, width = 10, height = 16, dpi = 300)